--- /dev/null
+package org.argeo.util;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.argeo.ArgeoException;
+
+/**
+ * Parses a CSV file interpreting the first line as a header. The
+ * {@link #parse(InputStream)} method and the setters are synchronized so that
+ * the object cannot be modified when parsing.
+ */
+public abstract class CsvParser {
+ private char separator = ',';
+ private char quote = '\"';
+
+ private Boolean noHeader = false;
+ private Boolean strictLineAsLongAsHeader = true;
+
+ /**
+ * Actually process a parsed line. If
+ * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the
+ * header and the tokens are guaranteed to have the same size.
+ *
+ * @param lineNumber
+ * the current line number, starts at 1 (the header, if header
+ * processing is enabled, the first lien otherwise)
+ * @param header
+ * the read-only header or null if {@link #setNoHeader(Boolean)}
+ * is true (default is false)
+ * @param tokens
+ * the parse tokens
+ */
+ protected abstract void processLine(Integer lineNumber,
+ List<String> header, List<String> tokens);
+
+ public synchronized void parse(InputStream in) {
+ BufferedReader reader = null;
+ Integer lineCount = 0;
+ try {
+ reader = new BufferedReader(new InputStreamReader(in));
+
+ List<String> header = null;
+ if (!noHeader) {
+ String headerStr = reader.readLine();
+ if (headerStr == null)// empty file
+ return;
+ lineCount++;
+ header = Collections.unmodifiableList(parseLine(headerStr));
+ }
+
+ String line = null;
+ lines: while ((line = reader.readLine()) != null) {
+ lineCount++;
+ List<String> tokens = parseLine(line);
+ if (!noHeader && strictLineAsLongAsHeader) {
+ int headerSize = header.size();
+ int tokenSize = tokens.size();
+ if (tokenSize == 1 && line.trim().equals(""))
+ continue lines;// empty line
+ if (headerSize != tokenSize) {
+ throw new ArgeoException("Token size " + tokenSize
+ + " is different from header size "
+ + headerSize + " at line " + lineCount
+ + ", line: " + line + ", header: " + header
+ + ", tokens: " + tokens);
+ }
+ }
+ processLine(lineCount, header, tokens);
+ }
+ } catch (ArgeoException e) {
+ throw e;
+ } catch (IOException e) {
+ throw new ArgeoException("Cannot parse CSV file (line: "
+ + lineCount + ")", e);
+ } finally {
+ if (reader != null)
+ try {
+ reader.close();
+ } catch (Exception e2) {
+ // silent
+ }
+ }
+ }
+
+ /** Parses a line character by character for performance purpose */
+ protected List<String> parseLine(String str) {
+ List<String> tokens = new ArrayList<String>();
+
+ char[] arr = str.toCharArray();
+ boolean inQuote = false;
+ StringBuffer currStr = new StringBuffer("");
+ for (int i = 0; i < arr.length; i++) {
+ char c = arr[i];
+ if (c == separator) {
+ if (!inQuote) {
+ tokens.add(currStr.toString());
+ currStr = new StringBuffer("");
+ }
+ } else if (c == quote) {
+ if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
+ // case of double quote
+ currStr.append(quote);
+ i++;
+ } else {// standard
+ inQuote = inQuote ? false : true;
+ }
+ } else {
+ currStr.append(c);
+ }
+ }
+ tokens.add(currStr.toString());
+ if (inQuote)
+ throw new ArgeoException("Missing quote at the end of the line "
+ + str + " (parsed: " + tokens + ")");
+ return tokens;
+ }
+
+ public char getSeparator() {
+ return separator;
+ }
+
+ public synchronized void setSeparator(char separator) {
+ this.separator = separator;
+ }
+
+ public char getQuote() {
+ return quote;
+ }
+
+ public synchronized void setQuote(char quote) {
+ this.quote = quote;
+ }
+
+ public Boolean getNoHeader() {
+ return noHeader;
+ }
+
+ public synchronized void setNoHeader(Boolean noHeader) {
+ this.noHeader = noHeader;
+ }
+
+ public Boolean getStrictLineAsLongAsHeader() {
+ return strictLineAsLongAsHeader;
+ }
+
+ public synchronized void setStrictLineAsLongAsHeader(
+ Boolean strictLineAsLongAsHeader) {
+ this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
+ }
+
+}
--- /dev/null
+package org.argeo.util;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.argeo.ArgeoException;
+
+/**
+ * CSV parser allowing to process lines as maps whose keys are the header
+ * fields.
+ */
+public abstract class CsvParserWithLinesAsMap extends CsvParser {
+
+ /**
+ * Actually processes a line.
+ *
+ * @param lineNumber
+ * the current line number, starts at 1 (the header, if header
+ * processing is enabled, the first lien otherwise)
+ * @param line
+ * the parsed tokens as a map whose keys are the header fields
+ */
+ protected abstract void processLine(Integer lineNumber,
+ Map<String, String> line);
+
+ protected final void processLine(Integer lineNumber, List<String> header,
+ List<String> tokens) {
+ if (header == null)
+ throw new ArgeoException("Only CSV with header is supported");
+ Map<String, String> line = new HashMap<String, String>();
+ for (int i = 0; i < header.size(); i++) {
+ String key = header.get(i);
+ String value = null;
+ if (i < tokens.size())
+ value = tokens.get(i);
+ line.put(key, value);
+ }
+ processLine(lineNumber, line);
+ }
+
+}
--- /dev/null
+package org.argeo.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class CsvParserTestCase extends TestCase {
+ public void testParse() throws Exception {
+ String toParse = "Header1,\"Header2\",Header3,\"Header4\"\n"
+ + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n"
+ + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n"
+ + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n";
+
+ InputStream in = new ByteArrayInputStream(toParse.getBytes());
+
+ CsvParser csvParser = new CsvParser() {
+ protected void processLine(Integer lineNumber, List<String> header,
+ List<String> tokens) {
+ assertEquals(header.size(), tokens.size());
+ assertEquals(4, tokens.size());
+ assertEquals("Col1", tokens.get(0));
+ assertEquals("Col2", tokens.get(1));
+ assertEquals("Col3", tokens.get(2));
+ assertEquals("\"Col4\"", tokens.get(3));
+ }
+ };
+
+ csvParser.parse(in);
+ in.close();
+ }
+}