X-Git-Url: http://git.argeo.org/?a=blobdiff_plain;ds=sidebyside;f=cms%2Forg.argeo.slc.support%2Fsrc%2Forg%2Fargeo%2Fslc%2Fdiff%2FLineTokenizer.java;fp=cms%2Forg.argeo.slc.support%2Fsrc%2Forg%2Fargeo%2Fslc%2Fdiff%2FLineTokenizer.java;h=fed4c37b518aae1771aeabc2eddb801cde542178;hb=ecc22e604e47533c79de9cecdcdeacbc752cbff1;hp=0000000000000000000000000000000000000000;hpb=e07ded4632e53f8b8869763bc1f1f4091361e76e;p=gpl%2Fargeo-slc.git diff --git a/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java b/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java new file mode 100644 index 000000000..fed4c37b5 --- /dev/null +++ b/cms/org.argeo.slc.support/src/org/argeo/slc/diff/LineTokenizer.java @@ -0,0 +1,96 @@ +package org.argeo.slc.diff; + +import java.util.ArrayList; +import java.util.List; + +/** + * Parses a string as a vector of strings according to a separator, dealing + * properly with missing values. This is intended to be used instead of the + * standard StringTokenizer, which does not deal well with empty values. + * Contrary to the StringTokenizer the provided String is parsed in the + * constructor and the values stored as a property. This should therefore not be + * used to parse long strings. No reference to the argument passed in + * constructor is kept. + */ +public class LineTokenizer { + private final List tokens; + + /** Complete constructor. */ + public LineTokenizer(String stringToParse, Character separator, + String noValueString) { + this.tokens = parse(stringToParse, separator, noValueString); + } + + /** + * Parse the string as a vector of strings. Can be overridden in order to + * provide another implementation. + */ + protected List parse(final String stringToParse, + final char separator, final String noValueString) { + // Init + final int NULL = -1; + List res = new ArrayList(); + final char[] array = stringToParse.toCharArray(); + int lastSeparatorIndex = NULL; + + // Loop on chars + for (int currIndex = 0; currIndex < array.length; currIndex++) { + char c = array[currIndex]; + if (c == separator) { + if (currIndex == 0) { + // first char is a separator + res.add(new String(noValueString)); + lastSeparatorIndex = 0; + } else if (lastSeparatorIndex == NULL) { + // first separator found + res.add(new String(array, 0, currIndex)); + lastSeparatorIndex = currIndex; + } else if (lastSeparatorIndex != NULL + && (lastSeparatorIndex == (currIndex - 1))) { + // consecutive separators + res.add(new String(noValueString)); + lastSeparatorIndex = currIndex; + } else { + // simple case + res.add(new String(array, lastSeparatorIndex + 1, currIndex + - lastSeparatorIndex - 1)); + lastSeparatorIndex = currIndex; + } + } + } + + // Finalize + if (lastSeparatorIndex == NULL) { + // no separator found + res.add(new String(stringToParse)); + } else if (lastSeparatorIndex == (array.length - 1)) { + // last char is a separator + res.add(new String(noValueString)); + } else { + // last token + res.add(new String(array, lastSeparatorIndex + 1, array.length + - lastSeparatorIndex - 1)); + } + return res; + } + + /** The tokens. */ + public List getTokens() { + return tokens; + } + + /** Parse */ + public static List tokenize(String stringToParse, + Character separator, String noValueString) { + LineTokenizer lt = new LineTokenizer(stringToParse, separator, + noValueString); + return lt.getTokens(); + } + + /** Parse, using the empty string as no value string. */ + public static List tokenize(String stringToParse, + Character separator) { + return tokenize(stringToParse, separator, ""); + } + +}