]> git.argeo.org Git - lgpl/argeo-commons.git/blob - util/CsvParser.java
Prepare next development cycle
[lgpl/argeo-commons.git] / util / CsvParser.java
1 package org.argeo.util;
2
3 import java.io.BufferedReader;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.io.InputStreamReader;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.List;
10
11 import org.argeo.ArgeoException;
12
13 /**
14 * Parses a CSV file interpreting the first line as a header. The
15 * {@link #parse(InputStream)} method and the setters are synchronized so that
16 * the object cannot be modified when parsing.
17 */
18 public abstract class CsvParser {
19 private char separator = ',';
20 private char quote = '\"';
21
22 private Boolean noHeader = false;
23 private Boolean strictLineAsLongAsHeader = true;
24
25 /**
26 * Actually process a parsed line. If
27 * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the
28 * header and the tokens are guaranteed to have the same size.
29 *
30 * @param lineNumber
31 * the current line number, starts at 1 (the header, if header
32 * processing is enabled, the first lien otherwise)
33 * @param header
34 * the read-only header or null if {@link #setNoHeader(Boolean)}
35 * is true (default is false)
36 * @param tokens
37 * the parse tokens
38 */
39 protected abstract void processLine(Integer lineNumber,
40 List<String> header, List<String> tokens);
41
42 public synchronized void parse(InputStream in) {
43 BufferedReader reader = null;
44 Integer lineCount = 0;
45 try {
46 reader = new BufferedReader(new InputStreamReader(in));
47
48 List<String> header = null;
49 if (!noHeader) {
50 String headerStr = reader.readLine();
51 if (headerStr == null)// empty file
52 return;
53 lineCount++;
54 header = Collections.unmodifiableList(parseLine(headerStr));
55 }
56
57 String line = null;
58 lines: while ((line = reader.readLine()) != null) {
59 lineCount++;
60 List<String> tokens = parseLine(line);
61 if (!noHeader && strictLineAsLongAsHeader) {
62 int headerSize = header.size();
63 int tokenSize = tokens.size();
64 if (tokenSize == 1 && line.trim().equals(""))
65 continue lines;// empty line
66 if (headerSize != tokenSize) {
67 throw new ArgeoException("Token size " + tokenSize
68 + " is different from header size "
69 + headerSize + " at line " + lineCount
70 + ", line: " + line + ", header: " + header
71 + ", tokens: " + tokens);
72 }
73 }
74 processLine(lineCount, header, tokens);
75 }
76 } catch (ArgeoException e) {
77 throw e;
78 } catch (IOException e) {
79 throw new ArgeoException("Cannot parse CSV file (line: "
80 + lineCount + ")", e);
81 } finally {
82 if (reader != null)
83 try {
84 reader.close();
85 } catch (Exception e2) {
86 // silent
87 }
88 }
89 }
90
91 /** Parses a line character by character for performance purpose */
92 protected List<String> parseLine(String str) {
93 List<String> tokens = new ArrayList<String>();
94
95 char[] arr = str.toCharArray();
96 boolean inQuote = false;
97 StringBuffer currStr = new StringBuffer("");
98 for (int i = 0; i < arr.length; i++) {
99 char c = arr[i];
100 if (c == separator) {
101 if (!inQuote) {
102 tokens.add(currStr.toString());
103 currStr = new StringBuffer("");
104 }
105 } else if (c == quote) {
106 if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
107 // case of double quote
108 currStr.append(quote);
109 i++;
110 } else {// standard
111 inQuote = inQuote ? false : true;
112 }
113 } else {
114 currStr.append(c);
115 }
116 }
117 tokens.add(currStr.toString());
118 if (inQuote)
119 throw new ArgeoException("Missing quote at the end of the line "
120 + str + " (parsed: " + tokens + ")");
121 return tokens;
122 }
123
124 public char getSeparator() {
125 return separator;
126 }
127
128 public synchronized void setSeparator(char separator) {
129 this.separator = separator;
130 }
131
132 public char getQuote() {
133 return quote;
134 }
135
136 public synchronized void setQuote(char quote) {
137 this.quote = quote;
138 }
139
140 public Boolean getNoHeader() {
141 return noHeader;
142 }
143
144 public synchronized void setNoHeader(Boolean noHeader) {
145 this.noHeader = noHeader;
146 }
147
148 public Boolean getStrictLineAsLongAsHeader() {
149 return strictLineAsLongAsHeader;
150 }
151
152 public synchronized void setStrictLineAsLongAsHeader(
153 Boolean strictLineAsLongAsHeader) {
154 this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
155 }
156
157 }