X-Git-Url: https://git.argeo.org/?a=blobdiff_plain;f=basic%2Fruntime%2Forg.argeo.basic.nodeps%2Fsrc%2Fmain%2Fjava%2Forg%2Fargeo%2Futil%2FCsvParser.java;h=3a429f4adc23a634dfcff402203a20e2c83e04d0;hb=6bb0606505be3e99021c5ff9771c719eb1e1f2e7;hp=4b4d0c8762016320fc8054aa281f2993a24506ff;hpb=56472382695a908e322c711070116aa64ca53b85;p=lgpl%2Fargeo-commons.git diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index 4b4d0c876..3a429f4ad 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -29,35 +29,58 @@ public abstract class CsvParser { * * @param lineNumber * the current line number, starts at 1 (the header, if header - * processing is enabled, the first lien otherwise) + * processing is enabled, the first line otherwise) * @param header * the read-only header or null if {@link #setNoHeader(Boolean)} * is true (default is false) * @param tokens - * the parse tokens + * the parsed tokens */ protected abstract void processLine(Integer lineNumber, List header, List tokens); public synchronized void parse(InputStream in) { + parse(in, null); + } + + public synchronized void parse(InputStream in, String encoding) { BufferedReader reader = null; Integer lineCount = 0; try { - reader = new BufferedReader(new InputStreamReader(in)); - + if (encoding == null) + reader = new BufferedReader(new InputStreamReader(in)); + else + reader = new BufferedReader(new InputStreamReader(in, encoding)); List header = null; if (!noHeader) { String headerStr = reader.readLine(); if (headerStr == null)// empty file return; lineCount++; - header = Collections.unmodifiableList(parseLine(headerStr)); + header = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(headerStr, header, currStr, wasInquote)) { + headerStr = reader.readLine(); + if (headerStr == null) + break; + wasInquote = true; + } + header = Collections.unmodifiableList(header); } String line = null; lines: while ((line = reader.readLine()) != null) { lineCount++; - List tokens = parseLine(line); + List tokens = new ArrayList(); + StringBuffer currStr = new StringBuffer(""); + Boolean wasInquote = false; + while (parseLine(line, tokens, currStr, wasInquote)) { + line = reader.readLine(); + if (line == null) + break; + wasInquote = true; + } if (!noHeader && strictLineAsLongAsHeader) { int headerSize = header.size(); int tokenSize = tokens.size(); @@ -88,19 +111,35 @@ public abstract class CsvParser { } } - /** Parses a line character by character for performance purpose */ - protected List parseLine(String str) { - List tokens = new ArrayList(); + /** + * Parses a line character by character for performance purpose + * + * @return whether to continue parsing this line + */ + protected Boolean parseLine(String str, List tokens, + StringBuffer currStr, Boolean wasInquote) { + // List tokens = new ArrayList(); + + // System.out.println("#LINE: " + str); + + if (wasInquote) + currStr.append('\n'); char[] arr = str.toCharArray(); - boolean inQuote = false; - StringBuffer currStr = new StringBuffer(""); + boolean inQuote = wasInquote; + // StringBuffer currStr = new StringBuffer(""); for (int i = 0; i < arr.length; i++) { char c = arr[i]; if (c == separator) { if (!inQuote) { tokens.add(currStr.toString()); - currStr = new StringBuffer(""); + // System.out.println("# TOKEN: " + currStr); + currStr.delete(0, currStr.length()); + } else { + // we don't remove separator that are in a quoted substring + // System.out + // .println("IN QUOTE, got a separator: [" + c + "]"); + currStr.append(c); } } else if (c == quote) { if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) { @@ -114,11 +153,19 @@ public abstract class CsvParser { currStr.append(c); } } - tokens.add(currStr.toString()); + + if (!inQuote) { + tokens.add(currStr.toString()); + // System.out.println("# TOKEN: " + currStr); + } + // if (inQuote) + // throw new ArgeoException("Missing quote at the end of the line " + // + str + " (parsed: " + tokens + ")"); if (inQuote) - throw new ArgeoException("Missing quote at the end of the line " - + str + " (parsed: " + tokens + ")"); - return tokens; + return true; + else + return false; + // return tokens; } public char getSeparator() {