]> git.argeo.org Git - lgpl/argeo-commons.git/blob - basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
Fix .classpath files
[lgpl/argeo-commons.git] / basic / runtime / org.argeo.basic.nodeps / src / main / java / org / argeo / util / CsvParser.java
1 package org.argeo.util;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.List;
10
11 import org.argeo.ArgeoException;
12
13 /**
14 * Parses a CSV file interpreting the first line as a header. The
15 * {@link #parse(InputStream)} method and the setters are synchronized so that
16 * the object cannot be modified when parsing.
17 */
18 public abstract class CsvParser {
19 private char separator = ',';
20 private char quote = '\"';
21
22 private Boolean noHeader = false;
23 private Boolean strictLineAsLongAsHeader = true;
24
25 /**
26 * Actually process a parsed line. If
27 * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the
28 * header and the tokens are guaranteed to have the same size.
29 *
30 * @param lineNumber
31 * the current line number, starts at 1 (the header, if header
32 * processing is enabled, the first line otherwise)
33 * @param header
34 * the read-only header or null if {@link #setNoHeader(Boolean)}
35 * is true (default is false)
36 * @param tokens
37 * the parsed tokens
38 */
39 protected abstract void processLine(Integer lineNumber,
40 List<String> header, List<String> tokens);
41
42 public synchronized void parse(InputStream in) {
43 BufferedReader reader = null;
44 Integer lineCount = 0;
45 try {
46 reader = new BufferedReader(new InputStreamReader(in));
47
48 List<String> header = null;
49 if (!noHeader) {
50 String headerStr = reader.readLine();
51 if (headerStr == null)// empty file
52 return;
53 lineCount++;
54 header = new ArrayList<String>();
55 StringBuffer currStr = new StringBuffer("");
56 Boolean wasInquote = false;
57 while (parseLine(headerStr, header, currStr, wasInquote)) {
58 headerStr = reader.readLine();
59 if (headerStr == null)
60 break;
61 wasInquote = true;
62 }
63 header = Collections.unmodifiableList(header);
64 }
65
66 String line = null;
67 lines: while ((line = reader.readLine()) != null) {
68 lineCount++;
69 List<String> tokens = new ArrayList<String>();
70 StringBuffer currStr = new StringBuffer("");
71 Boolean wasInquote = false;
72 while (parseLine(line, tokens, currStr, wasInquote)) {
73 line = reader.readLine();
74 if (line == null)
75 break;
76 wasInquote = true;
77 }
78 if (!noHeader && strictLineAsLongAsHeader) {
79 int headerSize = header.size();
80 int tokenSize = tokens.size();
81 if (tokenSize == 1 && line.trim().equals(""))
82 continue lines;// empty line
83 if (headerSize != tokenSize) {
84 throw new ArgeoException("Token size " + tokenSize
85 + " is different from header size "
86 + headerSize + " at line " + lineCount
87 + ", line: " + line + ", header: " + header
88 + ", tokens: " + tokens);
89 }
90 }
91 processLine(lineCount, header, tokens);
92 }
93 } catch (ArgeoException e) {
94 throw e;
95 } catch (IOException e) {
96 throw new ArgeoException("Cannot parse CSV file (line: "
97 + lineCount + ")", e);
98 } finally {
99 if (reader != null)
100 try {
101 reader.close();
102 } catch (Exception e2) {
103 // silent
104 }
105 }
106 }
107
108 /**
109 * Parses a line character by character for performance purpose
110 *
111 * @return whether to continue parsing this line
112 */
113 protected Boolean parseLine(String str, List<String> tokens,
114 StringBuffer currStr, Boolean wasInquote) {
115 // List<String> tokens = new ArrayList<String>();
116
117 // System.out.println("#LINE: " + str);
118
119 if (wasInquote)
120 currStr.append('\n');
121
122 char[] arr = str.toCharArray();
123 boolean inQuote = wasInquote;
124 // StringBuffer currStr = new StringBuffer("");
125 for (int i = 0; i < arr.length; i++) {
126 char c = arr[i];
127 if (c == separator) {
128 if (!inQuote) {
129 tokens.add(currStr.toString());
130 // System.out.println("# TOKEN: " + currStr);
131 currStr.delete(0, currStr.length());
132 } else {
133 // we don't remove separator that are in a quoted substring
134 // System.out
135 // .println("IN QUOTE, got a separator: [" + c + "]");
136 currStr.append(c);
137 }
138 } else if (c == quote) {
139 if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
140 // case of double quote
141 currStr.append(quote);
142 i++;
143 } else {// standard
144 inQuote = inQuote ? false : true;
145 }
146 } else {
147 currStr.append(c);
148 }
149 }
150
151 if (!inQuote) {
152 tokens.add(currStr.toString());
153 // System.out.println("# TOKEN: " + currStr);
154 }
155 // if (inQuote)
156 // throw new ArgeoException("Missing quote at the end of the line "
157 // + str + " (parsed: " + tokens + ")");
158 if (inQuote)
159 return true;
160 else
161 return false;
162 // return tokens;
163 }
164
165 public char getSeparator() {
166 return separator;
167 }
168
169 public synchronized void setSeparator(char separator) {
170 this.separator = separator;
171 }
172
173 public char getQuote() {
174 return quote;
175 }
176
177 public synchronized void setQuote(char quote) {
178 this.quote = quote;
179 }
180
181 public Boolean getNoHeader() {
182 return noHeader;
183 }
184
185 public synchronized void setNoHeader(Boolean noHeader) {
186 this.noHeader = noHeader;
187 }
188
189 public Boolean getStrictLineAsLongAsHeader() {
190 return strictLineAsLongAsHeader;
191 }
192
193 public synchronized void setStrictLineAsLongAsHeader(
194 Boolean strictLineAsLongAsHeader) {
195 this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
196 }
197
198 }