From 24d2e8155618208393100b9ba41ec0e2c656e903 Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Sat, 25 Feb 2012 12:47:40 +0000 Subject: [PATCH] Improve CSV parser and writer git-svn-id: https://svn.argeo.org/commons/trunk@5112 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../main/java/org/argeo/util/CsvParser.java | 18 +++++++- .../main/java/org/argeo/util/CsvWriter.java | 41 +++++++++++++------ 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index 3a429f4ad..7e218c55c 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -71,14 +71,19 @@ public abstract class CsvParser { String line = null; lines: while ((line = reader.readLine()) != null) { + line = preProcessLine(line); + if (line == null) { + // skip line + continue lines; + } lineCount++; List tokens = new ArrayList(); StringBuffer currStr = new StringBuffer(""); Boolean wasInquote = false; - while (parseLine(line, tokens, currStr, wasInquote)) { + sublines: while (parseLine(line, tokens, currStr, wasInquote)) { line = reader.readLine(); if (line == null) - break; + break sublines; wasInquote = true; } if (!noHeader && strictLineAsLongAsHeader) { @@ -111,6 +116,15 @@ public abstract class CsvParser { } } + /** + * Called before each (logical) line is processed, giving a change to modify + * it (typically for cleaning dirty files). To be overridden, return the + * line unchanged by default. Skip the line if 'null' is returned. + */ + protected String preProcessLine(String line) { + return line; + } + /** * Parses a line character by character for performance purpose * diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java index 85356e4fe..2167af1ad 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java @@ -2,7 +2,9 @@ package org.argeo.util; import java.io.IOException; import java.io.OutputStream; -import java.io.PrintWriter; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.io.Writer; import java.util.Iterator; import java.util.List; @@ -10,21 +12,33 @@ import org.argeo.ArgeoException; /** Write in CSV format. */ public class CsvWriter { - private final PrintWriter out; + private final Writer out; private char separator = ','; private char quote = '\"'; /** - * Creates a CSV writer. The header will be written immediately to the - * stream. + * Creates a CSV writer. * * @param out * the stream to write to. Caller is responsible for closing it. */ public CsvWriter(OutputStream out) { - super(); - this.out = new PrintWriter(out); + this.out = new OutputStreamWriter(out); + } + + /** + * Creates a CSV writer. + * + * @param out + * the stream to write to. Caller is responsible for closing it. + */ + public CsvWriter(OutputStream out, String encoding) { + try { + this.out = new OutputStreamWriter(out, encoding); + } catch (UnsupportedEncodingException e) { + throw new ArgeoException("Cannot initialize CSV writer", e); + } } /** @@ -38,9 +52,9 @@ public class CsvWriter { while (it.hasNext()) { writeToken(it.next().toString()); if (it.hasNext()) - out.print(separator); + out.write(separator); } - out.print('\n'); + out.write('\n'); out.flush(); } catch (IOException e) { throw new ArgeoException("Could not write " + tokens, e); @@ -57,9 +71,9 @@ public class CsvWriter { for (int i = 0; i < tokens.length; i++) { writeToken(tokens[i].toString()); if (i != (tokens.length - 1)) - out.print(separator); + out.write(separator); } - out.print('\n'); + out.write('\n'); out.flush(); } catch (IOException e) { throw new ArgeoException("Could not write " + tokens, e); @@ -70,6 +84,7 @@ public class CsvWriter { // +2 for possible quotes, another +2 assuming there would be an already // quoted string where quotes needs to be duplicated // another +2 for safety + // we don't want to increase buffer size while writing StringBuffer buf = new StringBuffer(token.length() + 6); char[] arr = token.toCharArray(); boolean shouldQuote = false; @@ -92,10 +107,10 @@ public class CsvWriter { } if (shouldQuote == true) - out.print(quote); - out.print(buf.toString()); + out.write(quote); + out.write(buf.toString()); if (shouldQuote == true) - out.print(quote); + out.write(quote); } public void setSeparator(char separator) { -- 2.30.2