From 7215a19b3ee12587e9d8298c9afd576bf087b017 Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Tue, 20 Sep 2011 10:16:27 +0000 Subject: [PATCH] Introduce CsvWriter Fix bug in CsvParser when an header contains a line break git-svn-id: https://svn.argeo.org/commons/trunk@4743 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../main/java/org/argeo/util/CsvParser.java | 9 ++- .../main/java/org/argeo/util/CsvWriter.java | 81 +++++++++++++++++++ .../util/tabular/TabularRowIterator.java | 6 +- .../org/argeo/util/CsvParserTestCase.java | 2 +- .../org/argeo/util/CsvWriterTestCase.java | 51 ++++++++++++ 5 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index 9acbfe246..c20f4e07c 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -29,12 +29,12 @@ public abstract class CsvParser { * * @param lineNumber * the current line number, starts at 1 (the header, if header - * processing is enabled, the first lien otherwise) + * processing is enabled, the first line otherwise) * @param header * the read-only header or null if {@link #setNoHeader(Boolean)} * is true (default is false) * @param tokens - * the parse tokens + * the parsed tokens */ protected abstract void processLine(Integer lineNumber, List header, List tokens); @@ -55,6 +55,9 @@ public abstract class CsvParser { StringBuffer currStr = new StringBuffer(""); Boolean wasInquote = false; while (parseLine(headerStr, header, currStr, wasInquote)) { + headerStr = reader.readLine(); + if (headerStr == null) + break; wasInquote = true; } header = Collections.unmodifiableList(header); @@ -124,7 +127,7 @@ public abstract class CsvParser { if (c == separator) { if (!inQuote) { tokens.add(currStr.toString()); - //System.out.println("# TOKEN: " + currStr); + // System.out.println("# TOKEN: " + currStr); currStr.delete(0, currStr.length()); } else { // we don't remove separator that are in a quoted substring diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java new file mode 100644 index 000000000..19086d613 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java @@ -0,0 +1,81 @@ +package org.argeo.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.List; + +import org.argeo.ArgeoException; + +/** Write in CSV format. */ +public class CsvWriter { + private final PrintWriter out; + + private char separator = ','; + private char quote = '\"'; + + /** + * Creates a CSV writer. The header will be written immediately to the + * stream. + * + * @param out + * the stream to write to. Caller is responsible for closing it. + */ + public CsvWriter(OutputStream out) { + super(); + this.out = new PrintWriter(out); + } + + /** + * Write a CSV line. Also used to write a header if needed (this is + * transparent for the CSV writer): simply call it first, before writing the + * lines. + */ + public void writeLine(List tokens) { + try { + Iterator it = tokens.iterator(); + while (it.hasNext()) { + writeToken(it.next().toString()); + if (it.hasNext()) + out.print(separator); + } + out.print('\n'); + out.flush(); + } catch (IOException e) { + throw new ArgeoException("Could not write " + tokens, e); + } + } + + protected void writeToken(String token) throws IOException { + // +2 for possible quotes, another +2 assuming there would be an already + // quoted string where quotes needs to be duplicated + // another +2 for safety + StringBuffer buf = new StringBuffer(token.length() + 6); + char[] arr = token.toCharArray(); + boolean shouldQuote = false; + for (char c : arr) { + if (!shouldQuote) { + if (c == separator) + shouldQuote = true; + if (c == '\n') + shouldQuote = true; + } + + if (c == quote) { + shouldQuote = true; + // duplicate quote + buf.append(quote); + } + + // generic case + buf.append(c); + } + + if (shouldQuote == true) + out.print(quote); + out.print(buf.toString()); + if (shouldQuote == true) + out.print(quote); + } +} diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRowIterator.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRowIterator.java index e98a8fba3..e75cff8c1 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRowIterator.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRowIterator.java @@ -5,8 +5,8 @@ import java.util.Iterator; /** Navigation of rows */ public interface TabularRowIterator extends Iterator { /** - * Current line number, incremented by each call to next(), starts at 0, but - * will therefore be 1 for the first row returned. + * Current row number, has to be incremented by each call to next() ; starts at 0, will + * therefore be 1 for the first row returned. */ - public Long getCurrentLineNumber(); + public Long getCurrentRowNumber(); } diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java index 363de8bc5..1e032357a 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java @@ -8,7 +8,7 @@ import junit.framework.TestCase; public class CsvParserTestCase extends TestCase { public void testParse() throws Exception { - String toParse = "Header1,\"Header2\",Header3,\"Header4\"\n" + String toParse = "Header1,\"Header\n2\",Header3,\"Header4\"\n" + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n" + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n" + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"; diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java new file mode 100644 index 000000000..f0c02e002 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java @@ -0,0 +1,51 @@ +package org.argeo.util; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import junit.framework.TestCase; + +public class CsvWriterTestCase extends TestCase { + public void testWrite() throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + final CsvWriter csvWriter = new CsvWriter(out); + + String[] header = { "Header1", "Header 2", "Header,3", "Header\n4", + "Header\"5\"" }; + String[] line1 = { "Value1", "Value 2", "Value,3", "Value\n4", + "Value\"5\"" }; + csvWriter.writeLine(Arrays.asList(header)); + csvWriter.writeLine(Arrays.asList(line1)); + + String reference = "Header1,Header 2,\"Header,3\",\"Header\n4\",\"Header\"\"5\"\"\"\n" + + "Value1,Value 2,\"Value,3\",\"Value\n4\",\"Value\"\"5\"\"\"\n"; + String written = new String(out.toByteArray()); + assertEquals(reference, written); + out.close(); + System.out.println(written); + + final List allTokens = new ArrayList(); + CsvParser csvParser = new CsvParser() { + protected void processLine(Integer lineNumber, List header, + List tokens) { + if (lineNumber == 2) + allTokens.addAll(header); + allTokens.addAll(tokens); + } + }; + ByteArrayInputStream in = new ByteArrayInputStream(written.getBytes()); + csvParser.parse(in); + in.close(); + List allTokensRef = new ArrayList(); + allTokensRef.addAll(Arrays.asList(header)); + allTokensRef.addAll(Arrays.asList(line1)); + + assertEquals(allTokensRef.size(), allTokens.size()); + for (int i = 0; i < allTokensRef.size(); i++) + assertEquals(allTokensRef.get(i), allTokens.get(i)); + } + +} -- 2.39.2