X-Git-Url: https://git.argeo.org/?a=blobdiff_plain;f=basic%2Fruntime%2Forg.argeo.basic.nodeps%2Fsrc%2Fmain%2Fjava%2Forg%2Fargeo%2Futil%2FCsvParser.java;h=c20f4e07c684c90902cef9fd473e16410286d64f;hb=66ad3dea875e1f8edcc65c02aca1ba7bcfa71a7d;hp=4b4d0c8762016320fc8054aa281f2993a24506ff;hpb=56472382695a908e322c711070116aa64ca53b85;p=lgpl%2Fargeo-commons.git diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index 4b4d0c876..c20f4e07c 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -29,12 +29,12 @@ public abstract class CsvParser { * * @param lineNumber * the current line number, starts at 1 (the header, if header - * processing is enabled, the first lien otherwise) + * processing is enabled, the first line otherwise) * @param header * the read-only header or null if {@link #setNoHeader(Boolean)} * is true (default is false) * @param tokens - * the parse tokens + * the parsed tokens */ protected abstract void processLine(Integer lineNumber, List header, List tokens); @@ -51,13 +51,30 @@ public abstract class CsvParser { if (headerStr == null)// empty file return; lineCount++; - header = Collections.unmodifiableList(parseLine(headerStr)); + header = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(headerStr, header, currStr, wasInquote)) { + headerStr = reader.readLine(); + if (headerStr == null) + break; + wasInquote = true; + } + header = Collections.unmodifiableList(header); } String line = null; lines: while ((line = reader.readLine()) != null) { lineCount++; - List tokens = parseLine(line); + List tokens = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(line, tokens, currStr, wasInquote)) { + line = reader.readLine(); + if (line == null) + break; + wasInquote = true; + } if (!noHeader && strictLineAsLongAsHeader) { int headerSize = header.size(); int tokenSize = tokens.size(); @@ -88,19 +105,35 @@ public abstract class CsvParser { } } - /** Parses a line character by character for performance purpose */ - protected List parseLine(String str) { - List tokens = new ArrayList(); + /** + * Parses a line character by character for performance purpose + * + * @return whether to continue parsing this line + */ + protected Boolean parseLine(String str, List tokens, + StringBuffer currStr, Boolean wasInquote) { + // List tokens = new ArrayList(); + + // System.out.println("#LINE: " + str); + + if (wasInquote) + currStr.append('\n'); char[] arr = str.toCharArray(); - boolean inQuote = false; - StringBuffer currStr = new StringBuffer(""); + boolean inQuote = wasInquote; + // StringBuffer currStr = new StringBuffer(""); for (int i = 0; i < arr.length; i++) { char c = arr[i]; if (c == separator) { if (!inQuote) { tokens.add(currStr.toString()); - currStr = new StringBuffer(""); + // System.out.println("# TOKEN: " + currStr); + currStr.delete(0, currStr.length()); + } else { + // we don't remove separator that are in a quoted substring + // System.out + // .println("IN QUOTE, got a separator: [" + c + "]"); + currStr.append(c); } } else if (c == quote) { if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) { @@ -114,11 +147,19 @@ public abstract class CsvParser { currStr.append(c); } } - tokens.add(currStr.toString()); + + if (!inQuote) { + tokens.add(currStr.toString()); + // System.out.println("# TOKEN: " + currStr); + } + // if (inQuote) + // throw new ArgeoException("Missing quote at the end of the line " + // + str + " (parsed: " + tokens + ")"); if (inQuote) - throw new ArgeoException("Missing quote at the end of the line " - + str + " (parsed: " + tokens + ")"); - return tokens; + return true; + else + return false; + // return tokens; } public char getSeparator() {