Have CsvParser dealing with line breaks
authorMathieu Baudier <mbaudier@argeo.org>
Wed, 2 Feb 2011 17:56:47 +0000 (17:56 +0000)
committerMathieu Baudier <mbaudier@argeo.org>
Wed, 2 Feb 2011 17:56:47 +0000 (17:56 +0000)
git-svn-id: https://svn.argeo.org/commons/trunk@4119 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java

index 4b4d0c8762016320fc8054aa281f2993a24506ff..ca81bc98305700e3be83f5ebac44c02f8718f40b 100644 (file)
@@ -51,13 +51,27 @@ public abstract class CsvParser {
                                if (headerStr == null)// empty file
                                        return;
                                lineCount++;
-                               header = Collections.unmodifiableList(parseLine(headerStr));
+                               header = new ArrayList<String>();
+                               StringBuffer currStr = new StringBuffer("");
+                               Boolean wasInquote = false;
+                               while (parseLine(headerStr, header, currStr, wasInquote)) {
+                                       wasInquote = true;
+                               }
+                               header = Collections.unmodifiableList(header);
                        }
 
                        String line = null;
                        lines: while ((line = reader.readLine()) != null) {
                                lineCount++;
-                               List<String> tokens = parseLine(line);
+                               List<String> tokens = new ArrayList<String>();
+                               StringBuffer currStr = new StringBuffer("");
+                               Boolean wasInquote = false;
+                               while (parseLine(line, tokens, currStr, wasInquote)) {
+                                       line = reader.readLine();
+                                       if (line == null)
+                                               break;
+                                       wasInquote = true;
+                               }
                                if (!noHeader && strictLineAsLongAsHeader) {
                                        int headerSize = header.size();
                                        int tokenSize = tokens.size();
@@ -88,19 +102,30 @@ public abstract class CsvParser {
                }
        }
 
-       /** Parses a line character by character for performance purpose */
-       protected List<String> parseLine(String str) {
-               List<String> tokens = new ArrayList<String>();
+       /**
+        * Parses a line character by character for performance purpose
+        * 
+        * @return whether to continue parsing this line
+        */
+       protected Boolean parseLine(String str, List<String> tokens,
+                       StringBuffer currStr, Boolean wasInquote) {
+               // List<String> tokens = new ArrayList<String>();
+
+               //System.out.println("#LINE: " + str);
+
+               if (wasInquote)
+                       currStr.append('\n');
 
                char[] arr = str.toCharArray();
-               boolean inQuote = false;
-               StringBuffer currStr = new StringBuffer("");
+               boolean inQuote = wasInquote;
+               // StringBuffer currStr = new StringBuffer("");
                for (int i = 0; i < arr.length; i++) {
                        char c = arr[i];
                        if (c == separator) {
                                if (!inQuote) {
                                        tokens.add(currStr.toString());
-                                       currStr = new StringBuffer("");
+                                       //System.out.println("# TOKEN: " + currStr);
+                                       currStr.delete(0, currStr.length());
                                }
                        } else if (c == quote) {
                                if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
@@ -114,11 +139,19 @@ public abstract class CsvParser {
                                currStr.append(c);
                        }
                }
-               tokens.add(currStr.toString());
+
+               if (!inQuote) {
+                       tokens.add(currStr.toString());
+                       //System.out.println("# TOKEN: " + currStr);
+               }
+               // if (inQuote)
+               // throw new ArgeoException("Missing quote at the end of the line "
+               // + str + " (parsed: " + tokens + ")");
                if (inQuote)
-                       throw new ArgeoException("Missing quote at the end of the line "
-                                       + str + " (parsed: " + tokens + ")");
-               return tokens;
+                       return true;
+               else
+                       return false;
+               // return tokens;
        }
 
        public char getSeparator() {
index 0b17d67b562fcb59a57a535d3c0bc7bab016d2ef..5a8e4a8b8cd2bb8186e42a193f7e66761306455b 100644 (file)
@@ -9,9 +9,9 @@ import junit.framework.TestCase;
 public class CsvParserTestCase extends TestCase {
        public void testParse() throws Exception {
                String toParse = "Header1,\"Header2\",Header3,\"Header4\"\n"
-                               + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n"
-                               + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n"
-                               + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n";
+                               + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"
+                               + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"
+                               + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n";
 
                InputStream in = new ByteArrayInputStream(toParse.getBytes());
 
@@ -21,7 +21,7 @@ public class CsvParserTestCase extends TestCase {
                                assertEquals(header.size(), tokens.size());
                                assertEquals(4, tokens.size());
                                assertEquals("Col1", tokens.get(0));
-                               assertEquals("Col2", tokens.get(1));
+                               assertEquals("Col\n2", tokens.get(1));
                                assertEquals("Col3", tokens.get(2));
                                assertEquals("\"Col4\"", tokens.get(3));
                        }