]> git.argeo.org Git - gpl/argeo-slc.git/blob - runtime/org.argeo.slc.support.simple/src/main/java/org/argeo/slc/diff/LineTokenizer.java
Add license headers
[gpl/argeo-slc.git] / runtime / org.argeo.slc.support.simple / src / main / java / org / argeo / slc / diff / LineTokenizer.java
1 /*
2 * Copyright (C) 2010 Mathieu Baudier <mbaudier@argeo.org>
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package org.argeo.slc.diff;
18
19 import java.util.ArrayList;
20 import java.util.List;
21
22 /**
23 * Parses a string as a vector of strings according to a separator, dealing
24 * properly with missing values. This is intended to be used instead of the
25 * standard StringTokenizer, which does not deal well with empty values.
26 * Contrary to the StringTokenizer the provided String is parsed in the
27 * constructor and the values stored as a property. This should therefore not be
28 * used to parse long strings. No reference to the argument passed in
29 * constructor is kept.
30 */
31 public class LineTokenizer {
32 private final List<String> tokens;
33
34 /** Complete constructor. */
35 public LineTokenizer(String stringToParse, Character separator,
36 String noValueString) {
37 this.tokens = parse(stringToParse, separator, noValueString);
38 }
39
40 /**
41 * Parse the string as a vector of strings. Can be overridden in order to
42 * provide another implementation.
43 */
44 protected List<String> parse(final String stringToParse,
45 final char separator, final String noValueString) {
46 // Init
47 final int NULL = -1;
48 List<String> res = new ArrayList<String>();
49 final char[] array = stringToParse.toCharArray();
50 int lastSeparatorIndex = NULL;
51
52 // Loop on chars
53 for (int currIndex = 0; currIndex < array.length; currIndex++) {
54 char c = array[currIndex];
55 if (c == separator) {
56 if (currIndex == 0) {
57 // first char is a separator
58 res.add(new String(noValueString));
59 lastSeparatorIndex = 0;
60 } else if (lastSeparatorIndex == NULL) {
61 // first separator found
62 res.add(new String(array, 0, currIndex));
63 lastSeparatorIndex = currIndex;
64 } else if (lastSeparatorIndex != NULL
65 && (lastSeparatorIndex == (currIndex - 1))) {
66 // consecutive separators
67 res.add(new String(noValueString));
68 lastSeparatorIndex = currIndex;
69 } else {
70 // simple case
71 res.add(new String(array, lastSeparatorIndex + 1, currIndex
72 - lastSeparatorIndex - 1));
73 lastSeparatorIndex = currIndex;
74 }
75 }
76 }
77
78 // Finalize
79 if (lastSeparatorIndex == NULL) {
80 // no separator found
81 res.add(new String(stringToParse));
82 } else if (lastSeparatorIndex == (array.length - 1)) {
83 // last char is a separator
84 res.add(new String(noValueString));
85 } else {
86 // last token
87 res.add(new String(array, lastSeparatorIndex + 1, array.length
88 - lastSeparatorIndex - 1));
89 }
90 return res;
91 }
92
93 /** The tokens. */
94 public List<String> getTokens() {
95 return tokens;
96 }
97
98 /** Parse */
99 public static List<String> tokenize(String stringToParse,
100 Character separator, String noValueString) {
101 LineTokenizer lt = new LineTokenizer(stringToParse, separator,
102 noValueString);
103 return lt.getTokens();
104 }
105
106 /** Parse, using the empty string as no value string. */
107 public static List<String> tokenize(String stringToParse,
108 Character separator) {
109 return tokenize(stringToParse, separator, "");
110 }
111
112 }