X-Git-Url: http://git.argeo.org/?a=blobdiff_plain;ds=sidebyside;f=cms%2Forg.argeo.slc.support%2Fsrc%2Forg%2Fargeo%2Fslc%2Fdiff%2FLineTokenizer.java;fp=cms%2Forg.argeo.slc.support%2Fsrc%2Forg%2Fargeo%2Fslc%2Fdiff%2FLineTokenizer.java;h=fed4c37b518aae1771aeabc2eddb801cde542178;hb=ecc22e604e47533c79de9cecdcdeacbc752cbff1;hp=0000000000000000000000000000000000000000;hpb=e07ded4632e53f8b8869763bc1f1f4091361e76e;p=gpl%2Fargeo-slc.git

diff --git a/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java b/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java
new file mode 100644
index 000000000..fed4c37b5
--- /dev/null
+++ b/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java
@@ -0,0 +1,96 @@
+package org.argeo.slc.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Parses a string as a vector of strings according to a separator, dealing
+ * properly with missing values. This is intended to be used instead of the
+ * standard StringTokenizer, which does not deal well with empty values.
+ * Contrary to the StringTokenizer the provided String is parsed in the
+ * constructor and the values stored as a property. This should therefore not be
+ * used to parse long strings. No reference to the argument passed in
+ * constructor is kept.
+ */
+public class LineTokenizer {
+	private final List<String> tokens;
+
+	/** Complete constructor. */
+	public LineTokenizer(String stringToParse, Character separator,
+			String noValueString) {
+		this.tokens = parse(stringToParse, separator, noValueString);
+	}
+
+	/**
+	 * Parse the string as a vector of strings. Can be overridden in order to
+	 * provide another implementation.
+	 */
+	protected List<String> parse(final String stringToParse,
+			final char separator, final String noValueString) {
+		// Init
+		final int NULL = -1;
+		List<String> res = new ArrayList<String>();
+		final char[] array = stringToParse.toCharArray();
+		int lastSeparatorIndex = NULL;
+
+		// Loop on chars
+		for (int currIndex = 0; currIndex < array.length; currIndex++) {
+			char c = array[currIndex];
+			if (c == separator) {
+				if (currIndex == 0) {
+					// first char is a separator
+					res.add(new String(noValueString));
+					lastSeparatorIndex = 0;
+				} else if (lastSeparatorIndex == NULL) {
+					// first separator found
+					res.add(new String(array, 0, currIndex));
+					lastSeparatorIndex = currIndex;
+				} else if (lastSeparatorIndex != NULL
+						&& (lastSeparatorIndex == (currIndex - 1))) {
+					// consecutive separators
+					res.add(new String(noValueString));
+					lastSeparatorIndex = currIndex;
+				} else {
+					// simple case
+					res.add(new String(array, lastSeparatorIndex + 1, currIndex
+							- lastSeparatorIndex - 1));
+					lastSeparatorIndex = currIndex;
+				}
+			}
+		}
+
+		// Finalize
+		if (lastSeparatorIndex == NULL) {
+			// no separator found
+			res.add(new String(stringToParse));
+		} else if (lastSeparatorIndex == (array.length - 1)) {
+			// last char is a separator
+			res.add(new String(noValueString));
+		} else {
+			// last token
+			res.add(new String(array, lastSeparatorIndex + 1, array.length
+					- lastSeparatorIndex - 1));
+		}
+		return res;
+	}
+
+	/** The tokens. */
+	public List<String> getTokens() {
+		return tokens;
+	}
+
+	/** Parse */
+	public static List<String> tokenize(String stringToParse,
+			Character separator, String noValueString) {
+		LineTokenizer lt = new LineTokenizer(stringToParse, separator,
+				noValueString);
+		return lt.getTokens();
+	}
+
+	/** Parse, using the empty string as no value string. */
+	public static List<String> tokenize(String stringToParse,
+			Character separator) {
+		return tokenize(stringToParse, separator, "");
+	}
+
+}