From a6182afd9f9c3db2b995a6d5d2f65eeea628576c Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Wed, 2 Feb 2011 17:56:47 +0000 Subject: [PATCH] Have CsvParser dealing with line breaks git-svn-id: https://svn.argeo.org/commons/trunk@4119 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../main/java/org/argeo/util/CsvParser.java | 57 +++++++++++++++---- .../org/argeo/util/CsvParserTestCase.java | 8 +-- 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index 4b4d0c876..ca81bc983 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -51,13 +51,27 @@ public abstract class CsvParser { if (headerStr == null)// empty file return; lineCount++; - header = Collections.unmodifiableList(parseLine(headerStr)); + header = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(headerStr, header, currStr, wasInquote)) { + wasInquote = true; + } + header = Collections.unmodifiableList(header); } String line = null; lines: while ((line = reader.readLine()) != null) { lineCount++; - List tokens = parseLine(line); + List tokens = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(line, tokens, currStr, wasInquote)) { + line = reader.readLine(); + if (line == null) + break; + wasInquote = true; + } if (!noHeader && strictLineAsLongAsHeader) { int headerSize = header.size(); int tokenSize = tokens.size(); @@ -88,19 +102,30 @@ public abstract class CsvParser { } } - /** Parses a line character by character for performance purpose */ - protected List parseLine(String str) { - List tokens = new ArrayList(); + /** + * Parses a line character by character for performance purpose + * + * @return whether to continue parsing this line + */ + protected Boolean parseLine(String str, List tokens, + StringBuffer currStr, Boolean wasInquote) { + // List tokens = new ArrayList(); + + //System.out.println("#LINE: " + str); + + if (wasInquote) + currStr.append('\n'); char[] arr = str.toCharArray(); - boolean inQuote = false; - StringBuffer currStr = new StringBuffer(""); + boolean inQuote = wasInquote; + // StringBuffer currStr = new StringBuffer(""); for (int i = 0; i < arr.length; i++) { char c = arr[i]; if (c == separator) { if (!inQuote) { tokens.add(currStr.toString()); - currStr = new StringBuffer(""); + //System.out.println("# TOKEN: " + currStr); + currStr.delete(0, currStr.length()); } } else if (c == quote) { if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) { @@ -114,11 +139,19 @@ public abstract class CsvParser { currStr.append(c); } } - tokens.add(currStr.toString()); + + if (!inQuote) { + tokens.add(currStr.toString()); + //System.out.println("# TOKEN: " + currStr); + } + // if (inQuote) + // throw new ArgeoException("Missing quote at the end of the line " + // + str + " (parsed: " + tokens + ")"); if (inQuote) - throw new ArgeoException("Missing quote at the end of the line " - + str + " (parsed: " + tokens + ")"); - return tokens; + return true; + else + return false; + // return tokens; } public char getSeparator() { diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java index 0b17d67b5..5a8e4a8b8 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserTestCase.java @@ -9,9 +9,9 @@ import junit.framework.TestCase; public class CsvParserTestCase extends TestCase { public void testParse() throws Exception { String toParse = "Header1,\"Header2\",Header3,\"Header4\"\n" - + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n" - + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n" - + "Col1,\"Col2\",Col3,\"\"\"Col4\"\"\"\n"; + + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n" + + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n" + + "Col1,\"Col\n2\",Col3,\"\"\"Col4\"\"\"\n"; InputStream in = new ByteArrayInputStream(toParse.getBytes()); @@ -21,7 +21,7 @@ public class CsvParserTestCase extends TestCase { assertEquals(header.size(), tokens.size()); assertEquals(4, tokens.size()); assertEquals("Col1", tokens.get(0)); - assertEquals("Col2", tokens.get(1)); + assertEquals("Col\n2", tokens.get(1)); assertEquals("Col3", tokens.get(2)); assertEquals("\"Col4\"", tokens.get(3)); } -- 2.39.2