Improve CSV parser and writer
authorMathieu Baudier <mbaudier@argeo.org>
Sat, 25 Feb 2012 12:47:40 +0000 (12:47 +0000)
committerMathieu Baudier <mbaudier@argeo.org>
Sat, 25 Feb 2012 12:47:40 +0000 (12:47 +0000)
git-svn-id: https://svn.argeo.org/commons/trunk@5112 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvWriter.java

index 3a429f4adc23a634dfcff402203a20e2c83e04d0..7e218c55ceaff4e9afa6811f0497a7ef796e1a8d 100644 (file)
@@ -71,14 +71,19 @@ public abstract class CsvParser {
 
                        String line = null;
                        lines: while ((line = reader.readLine()) != null) {
+                               line = preProcessLine(line);
+                               if (line == null) {
+                                       // skip line
+                                       continue lines;
+                               }
                                lineCount++;
                                List<String> tokens = new ArrayList<String>();
                                StringBuffer currStr = new StringBuffer("");
                                Boolean wasInquote = false;
-                               while (parseLine(line, tokens, currStr, wasInquote)) {
+                               sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
                                        line = reader.readLine();
                                        if (line == null)
-                                               break;
+                                               break sublines;
                                        wasInquote = true;
                                }
                                if (!noHeader && strictLineAsLongAsHeader) {
@@ -111,6 +116,15 @@ public abstract class CsvParser {
                }
        }
 
+       /**
+        * Called before each (logical) line is processed, giving a change to modify
+        * it (typically for cleaning dirty files). To be overridden, return the
+        * line unchanged by default. Skip the line if 'null' is returned.
+        */
+       protected String preProcessLine(String line) {
+               return line;
+       }
+
        /**
         * Parses a line character by character for performance purpose
         * 
index 85356e4fed07c4f24e9f980698a983c85d1a17d9..2167af1ad18a71dbf7fab3331a6e17eaefabb0a2 100644 (file)
@@ -2,7 +2,9 @@ package org.argeo.util;
 
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.PrintWriter;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
 import java.util.Iterator;
 import java.util.List;
 
@@ -10,21 +12,33 @@ import org.argeo.ArgeoException;
 
 /** Write in CSV format. */
 public class CsvWriter {
-       private final PrintWriter out;
+       private final Writer out;
 
        private char separator = ',';
        private char quote = '\"';
 
        /**
-        * Creates a CSV writer. The header will be written immediately to the
-        * stream.
+        * Creates a CSV writer.
         * 
         * @param out
         *            the stream to write to. Caller is responsible for closing it.
         */
        public CsvWriter(OutputStream out) {
-               super();
-               this.out = new PrintWriter(out);
+               this.out = new OutputStreamWriter(out);
+       }
+
+       /**
+        * Creates a CSV writer.
+        * 
+        * @param out
+        *            the stream to write to. Caller is responsible for closing it.
+        */
+       public CsvWriter(OutputStream out, String encoding) {
+               try {
+                       this.out = new OutputStreamWriter(out, encoding);
+               } catch (UnsupportedEncodingException e) {
+                       throw new ArgeoException("Cannot initialize CSV writer", e);
+               }
        }
 
        /**
@@ -38,9 +52,9 @@ public class CsvWriter {
                        while (it.hasNext()) {
                                writeToken(it.next().toString());
                                if (it.hasNext())
-                                       out.print(separator);
+                                       out.write(separator);
                        }
-                       out.print('\n');
+                       out.write('\n');
                        out.flush();
                } catch (IOException e) {
                        throw new ArgeoException("Could not write " + tokens, e);
@@ -57,9 +71,9 @@ public class CsvWriter {
                        for (int i = 0; i < tokens.length; i++) {
                                writeToken(tokens[i].toString());
                                if (i != (tokens.length - 1))
-                                       out.print(separator);
+                                       out.write(separator);
                        }
-                       out.print('\n');
+                       out.write('\n');
                        out.flush();
                } catch (IOException e) {
                        throw new ArgeoException("Could not write " + tokens, e);
@@ -70,6 +84,7 @@ public class CsvWriter {
                // +2 for possible quotes, another +2 assuming there would be an already
                // quoted string where quotes needs to be duplicated
                // another +2 for safety
+               // we don't want to increase buffer size while writing
                StringBuffer buf = new StringBuffer(token.length() + 6);
                char[] arr = token.toCharArray();
                boolean shouldQuote = false;
@@ -92,10 +107,10 @@ public class CsvWriter {
                }
 
                if (shouldQuote == true)
-                       out.print(quote);
-               out.print(buf.toString());
+                       out.write(quote);
+               out.write(buf.toString());
                if (shouldQuote == true)
-                       out.print(quote);
+                       out.write(quote);
        }
 
        public void setSeparator(char separator) {