]>
git.argeo.org Git - gpl/argeo-slc.git/blob - runtime/org.argeo.slc.support.simple/src/main/java/org/argeo/slc/diff/LineTokenizer.java
2 * Copyright (C) 2007-2012 Argeo GmbH
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 package org
.argeo
.slc
.diff
;
18 import java
.util
.ArrayList
;
19 import java
.util
.List
;
22 * Parses a string as a vector of strings according to a separator, dealing
23 * properly with missing values. This is intended to be used instead of the
24 * standard StringTokenizer, which does not deal well with empty values.
25 * Contrary to the StringTokenizer the provided String is parsed in the
26 * constructor and the values stored as a property. This should therefore not be
27 * used to parse long strings. No reference to the argument passed in
28 * constructor is kept.
30 public class LineTokenizer
{
31 private final List
<String
> tokens
;
33 /** Complete constructor. */
34 public LineTokenizer(String stringToParse
, Character separator
,
35 String noValueString
) {
36 this.tokens
= parse(stringToParse
, separator
, noValueString
);
40 * Parse the string as a vector of strings. Can be overridden in order to
41 * provide another implementation.
43 protected List
<String
> parse(final String stringToParse
,
44 final char separator
, final String noValueString
) {
47 List
<String
> res
= new ArrayList
<String
>();
48 final char[] array
= stringToParse
.toCharArray();
49 int lastSeparatorIndex
= NULL
;
52 for (int currIndex
= 0; currIndex
< array
.length
; currIndex
++) {
53 char c
= array
[currIndex
];
56 // first char is a separator
57 res
.add(new String(noValueString
));
58 lastSeparatorIndex
= 0;
59 } else if (lastSeparatorIndex
== NULL
) {
60 // first separator found
61 res
.add(new String(array
, 0, currIndex
));
62 lastSeparatorIndex
= currIndex
;
63 } else if (lastSeparatorIndex
!= NULL
64 && (lastSeparatorIndex
== (currIndex
- 1))) {
65 // consecutive separators
66 res
.add(new String(noValueString
));
67 lastSeparatorIndex
= currIndex
;
70 res
.add(new String(array
, lastSeparatorIndex
+ 1, currIndex
71 - lastSeparatorIndex
- 1));
72 lastSeparatorIndex
= currIndex
;
78 if (lastSeparatorIndex
== NULL
) {
80 res
.add(new String(stringToParse
));
81 } else if (lastSeparatorIndex
== (array
.length
- 1)) {
82 // last char is a separator
83 res
.add(new String(noValueString
));
86 res
.add(new String(array
, lastSeparatorIndex
+ 1, array
.length
87 - lastSeparatorIndex
- 1));
93 public List
<String
> getTokens() {
98 public static List
<String
> tokenize(String stringToParse
,
99 Character separator
, String noValueString
) {
100 LineTokenizer lt
= new LineTokenizer(stringToParse
, separator
,
102 return lt
.getTokens();
105 /** Parse, using the empty string as no value string. */
106 public static List
<String
> tokenize(String stringToParse
,
107 Character separator
) {
108 return tokenize(stringToParse
, separator
, "");