]>
git.argeo.org Git - gpl/argeo-slc.git/blob - runtime/org.argeo.slc.support.simple/src/main/java/org/argeo/slc/diff/LineTokenizer.java
2 * Copyright (C) 2010 Mathieu Baudier <mbaudier@argeo.org>
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package org
.argeo
.slc
.diff
;
19 import java
.util
.ArrayList
;
20 import java
.util
.List
;
23 * Parses a string as a vector of strings according to a separator, dealing
24 * properly with missing values. This is intended to be used instead of the
25 * standard StringTokenizer, which does not deal well with empty values.
26 * Contrary to the StringTokenizer the provided String is parsed in the
27 * constructor and the values stored as a property. This should therefore not be
28 * used to parse long strings. No reference to the argument passed in
29 * constructor is kept.
31 public class LineTokenizer
{
32 private final List
<String
> tokens
;
34 /** Complete constructor. */
35 public LineTokenizer(String stringToParse
, Character separator
,
36 String noValueString
) {
37 this.tokens
= parse(stringToParse
, separator
, noValueString
);
41 * Parse the string as a vector of strings. Can be overridden in order to
42 * provide another implementation.
44 protected List
<String
> parse(final String stringToParse
,
45 final char separator
, final String noValueString
) {
48 List
<String
> res
= new ArrayList
<String
>();
49 final char[] array
= stringToParse
.toCharArray();
50 int lastSeparatorIndex
= NULL
;
53 for (int currIndex
= 0; currIndex
< array
.length
; currIndex
++) {
54 char c
= array
[currIndex
];
57 // first char is a separator
58 res
.add(new String(noValueString
));
59 lastSeparatorIndex
= 0;
60 } else if (lastSeparatorIndex
== NULL
) {
61 // first separator found
62 res
.add(new String(array
, 0, currIndex
));
63 lastSeparatorIndex
= currIndex
;
64 } else if (lastSeparatorIndex
!= NULL
65 && (lastSeparatorIndex
== (currIndex
- 1))) {
66 // consecutive separators
67 res
.add(new String(noValueString
));
68 lastSeparatorIndex
= currIndex
;
71 res
.add(new String(array
, lastSeparatorIndex
+ 1, currIndex
72 - lastSeparatorIndex
- 1));
73 lastSeparatorIndex
= currIndex
;
79 if (lastSeparatorIndex
== NULL
) {
81 res
.add(new String(stringToParse
));
82 } else if (lastSeparatorIndex
== (array
.length
- 1)) {
83 // last char is a separator
84 res
.add(new String(noValueString
));
87 res
.add(new String(array
, lastSeparatorIndex
+ 1, array
.length
88 - lastSeparatorIndex
- 1));
94 public List
<String
> getTokens() {
99 public static List
<String
> tokenize(String stringToParse
,
100 Character separator
, String noValueString
) {
101 LineTokenizer lt
= new LineTokenizer(stringToParse
, separator
,
103 return lt
.getTokens();
106 /** Parse, using the empty string as no value string. */
107 public static List
<String
> tokenize(String stringToParse
,
108 Character separator
) {
109 return tokenize(stringToParse
, separator
, "");