Introduce CsvWriter
authorMathieu Baudier <mbaudier@argeo.org>
Tue, 20 Sep 2011 10:16:27 +0000 (10:16 +0000)
committerMathieu Baudier <mbaudier@argeo.org>
Tue, 20 Sep 2011 10:16:27 +0000 (10:16 +0000)
Fix bug in CsvParser when an header contains a line break

git-svn-id: https://svn.argeo.org/commons/trunk@4743 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRowIterator.java
basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java
basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java [new file with mode: 0644]

index 9acbfe246ce07ec67da94721fa25b7c23a5e7a9e..c20f4e07c684c90902cef9fd473e16410286d64f 100644 (file)
@@ -29,12 +29,12 @@ public abstract class CsvParser {
         * 
         * @param lineNumber
         *            the current line number, starts at 1 (the header, if header
-        *            processing is enabled, the first lien otherwise)
+        *            processing is enabled, the first line otherwise)
         * @param header
         *            the read-only header or null if {@link #setNoHeader(Boolean)}
         *            is true (default is false)
         * @param tokens
-        *            the parse tokens
+        *            the parsed tokens
         */
        protected abstract void processLine(Integer lineNumber,
                        List<String> header, List<String> tokens);
@@ -55,6 +55,9 @@ public abstract class CsvParser {
                                StringBuffer currStr = new StringBuffer("");
                                Boolean wasInquote = false;
                                while (parseLine(headerStr, header, currStr, wasInquote)) {
+                                       headerStr = reader.readLine();
+                                       if (headerStr == null)
+                                               break;
                                        wasInquote = true;
                                }
                                header = Collections.unmodifiableList(header);
@@ -124,7 +127,7 @@ public abstract class CsvParser {
                        if (c == separator) {
                                if (!inQuote) {
                                        tokens.add(currStr.toString());
-                                       //System.out.println("# TOKEN: " + currStr);
+                                       // System.out.println("# TOKEN: " + currStr);
                                        currStr.delete(0, currStr.length());
                                } else {
                                        // we don't remove separator that are in a quoted substring
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java
new file mode 100644 (file)
index 0000000..19086d6
--- /dev/null
@@ -0,0 +1,81 @@
+package org.argeo.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.Iterator;
+import java.util.List;
+
+import org.argeo.ArgeoException;
+
+/** Write in CSV format. */
+public class CsvWriter {
+       private final PrintWriter out;
+
+       private char separator = ',';
+       private char quote = '\"';
+
+       /**
+        * Creates a CSV writer. The header will be written immediately to the
+        * stream.
+        * 
+        * @param out
+        *            the stream to write to. Caller is responsible for closing it.
+        */
+       public CsvWriter(OutputStream out) {
+               super();
+               this.out = new PrintWriter(out);
+       }
+
+       /**
+        * Write a CSV line. Also used to write a header if needed (this is
+        * transparent for the CSV writer): simply call it first, before writing the
+        * lines.
+        */
+       public void writeLine(List<?> tokens) {
+               try {
+                       Iterator<?> it = tokens.iterator();
+                       while (it.hasNext()) {
+                               writeToken(it.next().toString());
+                               if (it.hasNext())
+                                       out.print(separator);
+                       }
+                       out.print('\n');
+                       out.flush();
+               } catch (IOException e) {
+                       throw new ArgeoException("Could not write " + tokens, e);
+               }
+       }
+
+       protected void writeToken(String token) throws IOException {
+               // +2 for possible quotes, another +2 assuming there would be an already
+               // quoted string where quotes needs to be duplicated
+               // another +2 for safety
+               StringBuffer buf = new StringBuffer(token.length() + 6);
+               char[] arr = token.toCharArray();
+               boolean shouldQuote = false;
+               for (char c : arr) {
+                       if (!shouldQuote) {
+                               if (c == separator)
+                                       shouldQuote = true;
+                               if (c == '\n')
+                                       shouldQuote = true;
+                       }
+
+                       if (c == quote) {
+                               shouldQuote = true;
+                               // duplicate quote
+                               buf.append(quote);
+                       }
+
+                       // generic case
+                       buf.append(c);
+               }
+
+               if (shouldQuote == true)
+                       out.print(quote);
+               out.print(buf.toString());
+               if (shouldQuote == true)
+                       out.print(quote);
+       }
+}
index e98a8fba39f2c23c510140433342e3d3a1047c16..e75cff8c1cb32118f5c9d101202414bfe332d829 100644 (file)
@@ -5,8 +5,8 @@ import java.util.Iterator;
 /** Navigation of rows */
 public interface TabularRowIterator extends Iterator<TabularRow> {
        /**
-        * Current line number, incremented by each call to next(), starts at 0, but
-        * will therefore be 1 for the first row returned.
+        * Current row number, has to be incremented by each call to next() ; starts at 0, will
+        * therefore be 1 for the first row returned.
         */
-       public Long getCurrentLineNumber();
+       public Long getCurrentRowNumber();
 }
index 363de8bc51683ab167db7e58c73d6b15178a268e..1e032357a2d6376e004fe04eef5596f810559d7c 100644 (file)
@@ -8,7 +8,7 @@ import junit.framework.TestCase;
 
 public class CsvParserTestCase extends TestCase {
        public void testParse() throws Exception {
-               String toParse = "Header1,\"Header2\",Header3,\"Header4\"\n"
+               String toParse = "Header1,\"Header\n2\",Header3,\"Header4\"\n"
                                + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"
                                + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"
                                + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n";
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvWriterTestCase.java
new file mode 100644 (file)
index 0000000..f0c02e0
--- /dev/null
@@ -0,0 +1,51 @@
+package org.argeo.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class CsvWriterTestCase extends TestCase {
+       public void testWrite() throws Exception {
+               ByteArrayOutputStream out = new ByteArrayOutputStream();
+               final CsvWriter csvWriter = new CsvWriter(out);
+
+               String[] header = { "Header1", "Header 2", "Header,3", "Header\n4",
+                               "Header\"5\"" };
+               String[] line1 = { "Value1", "Value 2", "Value,3", "Value\n4",
+                               "Value\"5\"" };
+               csvWriter.writeLine(Arrays.asList(header));
+               csvWriter.writeLine(Arrays.asList(line1));
+
+               String reference = "Header1,Header 2,\"Header,3\",\"Header\n4\",\"Header\"\"5\"\"\"\n"
+                               + "Value1,Value 2,\"Value,3\",\"Value\n4\",\"Value\"\"5\"\"\"\n";
+               String written = new String(out.toByteArray());
+               assertEquals(reference, written);
+               out.close();
+               System.out.println(written);
+
+               final List<String> allTokens = new ArrayList<String>();
+               CsvParser csvParser = new CsvParser() {
+                       protected void processLine(Integer lineNumber, List<String> header,
+                                       List<String> tokens) {
+                               if (lineNumber == 2)
+                                       allTokens.addAll(header);
+                               allTokens.addAll(tokens);
+                       }
+               };
+               ByteArrayInputStream in = new ByteArrayInputStream(written.getBytes());
+               csvParser.parse(in);
+               in.close();
+               List<String> allTokensRef = new ArrayList<String>();
+               allTokensRef.addAll(Arrays.asList(header));
+               allTokensRef.addAll(Arrays.asList(line1));
+
+               assertEquals(allTokensRef.size(), allTokens.size());
+               for (int i = 0; i < allTokensRef.size(); i++)
+                       assertEquals(allTokensRef.get(i), allTokens.get(i));
+       }
+
+}