]> git.argeo.org Git - gpl/argeo-slc.git/blob - org.argeo.slc.core/src/main/java/org/argeo/slc/diff/LineTokenizer.java
Update XML formats
[gpl/argeo-slc.git] / org.argeo.slc.core / src / main / java / org / argeo / slc / diff / LineTokenizer.java
1 package org.argeo.slc.diff;
2
3 import java.util.ArrayList;
4 import java.util.List;
5
6 /**
7 * Parses a string as a vector of strings according to a separator, dealing
8 * properly with missing values. This is intended to be used instead of the
9 * standard StringTokenizer, which does not deal well with empty values.
10 * Contrary to the StringTokenizer the provided String is parsed in the
11 * constructor and the values stored as a property. This should therefore not be
12 * used to parse long strings. No reference to the argument passed in
13 * constructor is kept.
14 */
15 public class LineTokenizer {
16 private final List<String> tokens;
17
18 /** Complete constructor. */
19 public LineTokenizer(String stringToParse, Character separator,
20 String noValueString) {
21 this.tokens = parse(stringToParse, separator, noValueString);
22 }
23
24 /**
25 * Parse the string as a vector of strings. Can be overridden in order to
26 * provide another implementation.
27 */
28 protected List<String> parse(final String stringToParse,
29 final char separator, final String noValueString) {
30 // Init
31 final int NULL = -1;
32 List<String> res = new ArrayList<String>();
33 final char[] array = stringToParse.toCharArray();
34 int lastSeparatorIndex = NULL;
35
36 // Loop on chars
37 for (int currIndex = 0; currIndex < array.length; currIndex++) {
38 char c = array[currIndex];
39 if (c == separator) {
40 if (currIndex == 0) {
41 // first char is a separator
42 res.add(new String(noValueString));
43 lastSeparatorIndex = 0;
44 } else if (lastSeparatorIndex == NULL) {
45 // first separator found
46 res.add(new String(array, 0, currIndex));
47 lastSeparatorIndex = currIndex;
48 } else if (lastSeparatorIndex != NULL
49 && (lastSeparatorIndex == (currIndex - 1))) {
50 // consecutive separators
51 res.add(new String(noValueString));
52 lastSeparatorIndex = currIndex;
53 } else {
54 // simple case
55 res.add(new String(array, lastSeparatorIndex + 1, currIndex
56 - lastSeparatorIndex - 1));
57 lastSeparatorIndex = currIndex;
58 }
59 }
60 }
61
62 // Finalize
63 if (lastSeparatorIndex == NULL) {
64 // no separator found
65 res.add(new String(stringToParse));
66 } else if (lastSeparatorIndex == (array.length - 1)) {
67 // last char is a separator
68 res.add(new String(noValueString));
69 } else {
70 // last token
71 res.add(new String(array, lastSeparatorIndex + 1, array.length
72 - lastSeparatorIndex - 1));
73 }
74 return res;
75 }
76
77 /** The tokens. */
78 public List<String> getTokens() {
79 return tokens;
80 }
81
82 /** Parse */
83 public static List<String> tokenize(String stringToParse,
84 Character separator, String noValueString) {
85 LineTokenizer lt = new LineTokenizer(stringToParse, separator,
86 noValueString);
87 return lt.getTokens();
88 }
89
90 /** Parse, using the empty string as no value string. */
91 public static List<String> tokenize(String stringToParse,
92 Character separator) {
93 return tokenize(stringToParse, separator, "");
94 }
95
96 }