package org.argeo.slc.diff; import java.util.ArrayList; import java.util.List; /** * Parses a string as a vector of strings according to a separator, dealing * properly with missing values. This is intended to be used instead of the * standard StringTokenizer, which does not deal well with empty values. * Contrary to the StringTokenizer the provided String is parsed in the * constructor and the values stored as a property. This should therefore not be * used to parse long strings. No reference to the argument passed in * constructor is kept. */ public class LineTokenizer { private final List tokens; /** Complete constructor. */ public LineTokenizer(String stringToParse, Character separator, String noValueString) { this.tokens = parse(stringToParse, separator, noValueString); } /** * Parse the string as a vector of strings. Can be overridden in order to * provide another implementation. */ protected List parse(final String stringToParse, final char separator, final String noValueString) { // Init final int NULL = -1; List res = new ArrayList(); final char[] array = stringToParse.toCharArray(); int lastSeparatorIndex = NULL; // Loop on chars for (int currIndex = 0; currIndex < array.length; currIndex++) { char c = array[currIndex]; if (c == separator) { if (currIndex == 0) { // first char is a separator res.add(new String(noValueString)); lastSeparatorIndex = 0; } else if (lastSeparatorIndex == NULL) { // first separator found res.add(new String(array, 0, currIndex)); lastSeparatorIndex = currIndex; } else if (lastSeparatorIndex != NULL && (lastSeparatorIndex == (currIndex - 1))) { // consecutive separators res.add(new String(noValueString)); lastSeparatorIndex = currIndex; } else { // simple case res.add(new String(array, lastSeparatorIndex + 1, currIndex - lastSeparatorIndex - 1)); lastSeparatorIndex = currIndex; } } } // Finalize if (lastSeparatorIndex == NULL) { // no separator found res.add(new String(stringToParse)); } else if (lastSeparatorIndex == (array.length - 1)) { // last char is a separator res.add(new String(noValueString)); } else { // last token res.add(new String(array, lastSeparatorIndex + 1, array.length - lastSeparatorIndex - 1)); } return res; } /** The tokens. */ public List getTokens() { return tokens; } /** Parse */ public static List tokenize(String stringToParse, Character separator, String noValueString) { LineTokenizer lt = new LineTokenizer(stringToParse, separator, noValueString); return lt.getTokens(); } /** Parse, using the empty string as no value string. */ public static List tokenize(String stringToParse, Character separator) { return tokenize(stringToParse, separator, ""); } }