1 package org
.argeo
.util
;
3 import java
.io
.BufferedReader
;
4 import java
.io
.IOException
;
5 import java
.io
.InputStream
;
6 import java
.io
.InputStreamReader
;
7 import java
.util
.ArrayList
;
8 import java
.util
.Collections
;
11 import org
.argeo
.ArgeoException
;
14 * Parses a CSV file interpreting the first line as a header. The
15 * {@link #parse(InputStream)} method and the setters are synchronized so that
16 * the object cannot be modified when parsing.
18 public abstract class CsvParser
{
19 private char separator
= ',';
20 private char quote
= '\"';
22 private Boolean noHeader
= false;
23 private Boolean strictLineAsLongAsHeader
= true;
26 * Actually process a parsed line. If
27 * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the
28 * header and the tokens are guaranteed to have the same size.
31 * the current line number, starts at 1 (the header, if header
32 * processing is enabled, the first lien otherwise)
34 * the read-only header or null if {@link #setNoHeader(Boolean)}
35 * is true (default is false)
39 protected abstract void processLine(Integer lineNumber
,
40 List
<String
> header
, List
<String
> tokens
);
42 public synchronized void parse(InputStream in
) {
43 BufferedReader reader
= null;
44 Integer lineCount
= 0;
46 reader
= new BufferedReader(new InputStreamReader(in
));
48 List
<String
> header
= null;
50 String headerStr
= reader
.readLine();
51 if (headerStr
== null)// empty file
54 header
= new ArrayList
<String
>();
55 StringBuffer currStr
= new StringBuffer("");
56 Boolean wasInquote
= false;
57 while (parseLine(headerStr
, header
, currStr
, wasInquote
)) {
60 header
= Collections
.unmodifiableList(header
);
64 lines
: while ((line
= reader
.readLine()) != null) {
66 List
<String
> tokens
= new ArrayList
<String
>();
67 StringBuffer currStr
= new StringBuffer("");
68 Boolean wasInquote
= false;
69 while (parseLine(line
, tokens
, currStr
, wasInquote
)) {
70 line
= reader
.readLine();
75 if (!noHeader
&& strictLineAsLongAsHeader
) {
76 int headerSize
= header
.size();
77 int tokenSize
= tokens
.size();
78 if (tokenSize
== 1 && line
.trim().equals(""))
79 continue lines
;// empty line
80 if (headerSize
!= tokenSize
) {
81 throw new ArgeoException("Token size " + tokenSize
82 + " is different from header size "
83 + headerSize
+ " at line " + lineCount
84 + ", line: " + line
+ ", header: " + header
85 + ", tokens: " + tokens
);
88 processLine(lineCount
, header
, tokens
);
90 } catch (ArgeoException e
) {
92 } catch (IOException e
) {
93 throw new ArgeoException("Cannot parse CSV file (line: "
94 + lineCount
+ ")", e
);
99 } catch (Exception e2
) {
106 * Parses a line character by character for performance purpose
108 * @return whether to continue parsing this line
110 protected Boolean
parseLine(String str
, List
<String
> tokens
,
111 StringBuffer currStr
, Boolean wasInquote
) {
112 // List<String> tokens = new ArrayList<String>();
114 //System.out.println("#LINE: " + str);
117 currStr
.append('\n');
119 char[] arr
= str
.toCharArray();
120 boolean inQuote
= wasInquote
;
121 // StringBuffer currStr = new StringBuffer("");
122 for (int i
= 0; i
< arr
.length
; i
++) {
124 if (c
== separator
) {
126 tokens
.add(currStr
.toString());
127 //System.out.println("# TOKEN: " + currStr);
128 currStr
.delete(0, currStr
.length());
130 } else if (c
== quote
) {
131 if (inQuote
&& (i
+ 1) < arr
.length
&& arr
[i
+ 1] == quote
) {
132 // case of double quote
133 currStr
.append(quote
);
136 inQuote
= inQuote ?
false : true;
144 tokens
.add(currStr
.toString());
145 //System.out.println("# TOKEN: " + currStr);
148 // throw new ArgeoException("Missing quote at the end of the line "
149 // + str + " (parsed: " + tokens + ")");
157 public char getSeparator() {
161 public synchronized void setSeparator(char separator
) {
162 this.separator
= separator
;
165 public char getQuote() {
169 public synchronized void setQuote(char quote
) {
173 public Boolean
getNoHeader() {
177 public synchronized void setNoHeader(Boolean noHeader
) {
178 this.noHeader
= noHeader
;
181 public Boolean
getStrictLineAsLongAsHeader() {
182 return strictLineAsLongAsHeader
;
185 public synchronized void setStrictLineAsLongAsHeader(
186 Boolean strictLineAsLongAsHeader
) {
187 this.strictLineAsLongAsHeader
= strictLineAsLongAsHeader
;