]> git.argeo.org Git - gpl/argeo-slc.git/blobdiff - cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java
Clarify SLC project structure.
[gpl/argeo-slc.git] / cms / org.argeo.slc.support / src / org / argeo / slc / diff / LineTokenizer.java
diff --git a/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java b/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java
new file mode 100644 (file)
index 0000000..fed4c37
--- /dev/null
@@ -0,0 +1,96 @@
+package org.argeo.slc.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Parses a string as a vector of strings according to a separator, dealing
+ * properly with missing values. This is intended to be used instead of the
+ * standard StringTokenizer, which does not deal well with empty values.
+ * Contrary to the StringTokenizer the provided String is parsed in the
+ * constructor and the values stored as a property. This should therefore not be
+ * used to parse long strings. No reference to the argument passed in
+ * constructor is kept.
+ */
+public class LineTokenizer {
+       private final List<String> tokens;
+
+       /** Complete constructor. */
+       public LineTokenizer(String stringToParse, Character separator,
+                       String noValueString) {
+               this.tokens = parse(stringToParse, separator, noValueString);
+       }
+
+       /**
+        * Parse the string as a vector of strings. Can be overridden in order to
+        * provide another implementation.
+        */
+       protected List<String> parse(final String stringToParse,
+                       final char separator, final String noValueString) {
+               // Init
+               final int NULL = -1;
+               List<String> res = new ArrayList<String>();
+               final char[] array = stringToParse.toCharArray();
+               int lastSeparatorIndex = NULL;
+
+               // Loop on chars
+               for (int currIndex = 0; currIndex < array.length; currIndex++) {
+                       char c = array[currIndex];
+                       if (c == separator) {
+                               if (currIndex == 0) {
+                                       // first char is a separator
+                                       res.add(new String(noValueString));
+                                       lastSeparatorIndex = 0;
+                               } else if (lastSeparatorIndex == NULL) {
+                                       // first separator found
+                                       res.add(new String(array, 0, currIndex));
+                                       lastSeparatorIndex = currIndex;
+                               } else if (lastSeparatorIndex != NULL
+                                               && (lastSeparatorIndex == (currIndex - 1))) {
+                                       // consecutive separators
+                                       res.add(new String(noValueString));
+                                       lastSeparatorIndex = currIndex;
+                               } else {
+                                       // simple case
+                                       res.add(new String(array, lastSeparatorIndex + 1, currIndex
+                                                       - lastSeparatorIndex - 1));
+                                       lastSeparatorIndex = currIndex;
+                               }
+                       }
+               }
+
+               // Finalize
+               if (lastSeparatorIndex == NULL) {
+                       // no separator found
+                       res.add(new String(stringToParse));
+               } else if (lastSeparatorIndex == (array.length - 1)) {
+                       // last char is a separator
+                       res.add(new String(noValueString));
+               } else {
+                       // last token
+                       res.add(new String(array, lastSeparatorIndex + 1, array.length
+                                       - lastSeparatorIndex - 1));
+               }
+               return res;
+       }
+
+       /** The tokens. */
+       public List<String> getTokens() {
+               return tokens;
+       }
+
+       /** Parse */
+       public static List<String> tokenize(String stringToParse,
+                       Character separator, String noValueString) {
+               LineTokenizer lt = new LineTokenizer(stringToParse, separator,
+                               noValueString);
+               return lt.getTokens();
+       }
+
+       /** Parse, using the empty string as no value string. */
+       public static List<String> tokenize(String stringToParse,
+                       Character separator) {
+               return tokenize(stringToParse, separator, "");
+       }
+
+}