]> git.argeo.org Git - lgpl/argeo-commons.git/blob - org.argeo.util/src/org/argeo/util/CsvParser.java
Improve minimal web app.
[lgpl/argeo-commons.git] / org.argeo.util / src / org / argeo / util / CsvParser.java
1 package org.argeo.util;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.List;
10
11 /**
12 * Parses a CSV file interpreting the first line as a header. The
13 * {@link #parse(InputStream)} method and the setters are synchronized so that
14 * the object cannot be modified when parsing.
15 */
16 public abstract class CsvParser {
17 private char separator = ',';
18 private char quote = '\"';
19
20 private Boolean noHeader = false;
21 private Boolean strictLineAsLongAsHeader = true;
22
23 /**
24 * Actually process a parsed line. If
25 * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the
26 * header and the tokens are guaranteed to have the same size.
27 *
28 * @param lineNumber
29 * the current line number, starts at 1 (the header, if header
30 * processing is enabled, the first line otherwise)
31 * @param header
32 * the read-only header or null if {@link #setNoHeader(Boolean)}
33 * is true (default is false)
34 * @param tokens
35 * the parsed tokens
36 */
37 protected abstract void processLine(Integer lineNumber,
38 List<String> header, List<String> tokens);
39
40 /**
41 * Parses the CSV file (stream is closed at the end)
42 */
43 public synchronized void parse(InputStream in) {
44 parse(in, null);
45 }
46
47 /**
48 * Parses the CSV file (stream is closed at the end)
49 */
50 public synchronized void parse(InputStream in, String encoding) {
51 BufferedReader reader = null;
52 Integer lineCount = 0;
53 try {
54 if (encoding == null)
55 reader = new BufferedReader(new InputStreamReader(in));
56 else
57 reader = new BufferedReader(new InputStreamReader(in, encoding));
58 List<String> header = null;
59 if (!noHeader) {
60 String headerStr = reader.readLine();
61 if (headerStr == null)// empty file
62 return;
63 lineCount++;
64 header = new ArrayList<String>();
65 StringBuffer currStr = new StringBuffer("");
66 Boolean wasInquote = false;
67 while (parseLine(headerStr, header, currStr, wasInquote)) {
68 headerStr = reader.readLine();
69 if (headerStr == null)
70 break;
71 wasInquote = true;
72 }
73 header = Collections.unmodifiableList(header);
74 }
75
76 String line = null;
77 lines: while ((line = reader.readLine()) != null) {
78 line = preProcessLine(line);
79 if (line == null) {
80 // skip line
81 continue lines;
82 }
83 lineCount++;
84 List<String> tokens = new ArrayList<String>();
85 StringBuffer currStr = new StringBuffer("");
86 Boolean wasInquote = false;
87 sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
88 line = reader.readLine();
89 if (line == null)
90 break sublines;
91 wasInquote = true;
92 }
93 if (!noHeader && strictLineAsLongAsHeader) {
94 int headerSize = header.size();
95 int tokenSize = tokens.size();
96 if (tokenSize == 1 && line.trim().equals(""))
97 continue lines;// empty line
98 if (headerSize != tokenSize) {
99 throw new UtilsException("Token size " + tokenSize
100 + " is different from header size "
101 + headerSize + " at line " + lineCount
102 + ", line: " + line + ", header: " + header
103 + ", tokens: " + tokens);
104 }
105 }
106 processLine(lineCount, header, tokens);
107 }
108 } catch (UtilsException e) {
109 throw e;
110 } catch (IOException e) {
111 throw new UtilsException("Cannot parse CSV file (line: "
112 + lineCount + ")", e);
113 } finally {
114 StreamUtils.closeQuietly(reader);
115 }
116 }
117
118 /**
119 * Called before each (logical) line is processed, giving a change to modify
120 * it (typically for cleaning dirty files). To be overridden, return the
121 * line unchanged by default. Skip the line if 'null' is returned.
122 */
123 protected String preProcessLine(String line) {
124 return line;
125 }
126
127 /**
128 * Parses a line character by character for performance purpose
129 *
130 * @return whether to continue parsing this line
131 */
132 protected Boolean parseLine(String str, List<String> tokens,
133 StringBuffer currStr, Boolean wasInquote) {
134 // List<String> tokens = new ArrayList<String>();
135
136 // System.out.println("#LINE: " + str);
137
138 if (wasInquote)
139 currStr.append('\n');
140
141 char[] arr = str.toCharArray();
142 boolean inQuote = wasInquote;
143 // StringBuffer currStr = new StringBuffer("");
144 for (int i = 0; i < arr.length; i++) {
145 char c = arr[i];
146 if (c == separator) {
147 if (!inQuote) {
148 tokens.add(currStr.toString());
149 // System.out.println("# TOKEN: " + currStr);
150 currStr.delete(0, currStr.length());
151 } else {
152 // we don't remove separator that are in a quoted substring
153 // System.out
154 // .println("IN QUOTE, got a separator: [" + c + "]");
155 currStr.append(c);
156 }
157 } else if (c == quote) {
158 if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
159 // case of double quote
160 currStr.append(quote);
161 i++;
162 } else {// standard
163 inQuote = inQuote ? false : true;
164 }
165 } else {
166 currStr.append(c);
167 }
168 }
169
170 if (!inQuote) {
171 tokens.add(currStr.toString());
172 // System.out.println("# TOKEN: " + currStr);
173 }
174 // if (inQuote)
175 // throw new ArgeoException("Missing quote at the end of the line "
176 // + str + " (parsed: " + tokens + ")");
177 if (inQuote)
178 return true;
179 else
180 return false;
181 // return tokens;
182 }
183
184 public char getSeparator() {
185 return separator;
186 }
187
188 public synchronized void setSeparator(char separator) {
189 this.separator = separator;
190 }
191
192 public char getQuote() {
193 return quote;
194 }
195
196 public synchronized void setQuote(char quote) {
197 this.quote = quote;
198 }
199
200 public Boolean getNoHeader() {
201 return noHeader;
202 }
203
204 public synchronized void setNoHeader(Boolean noHeader) {
205 this.noHeader = noHeader;
206 }
207
208 public Boolean getStrictLineAsLongAsHeader() {
209 return strictLineAsLongAsHeader;
210 }
211
212 public synchronized void setStrictLineAsLongAsHeader(
213 Boolean strictLineAsLongAsHeader) {
214 this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
215 }
216
217 }