X-Git-Url: https://git.argeo.org/?a=blobdiff_plain;f=org.argeo.util%2Fsrc%2Forg%2Fargeo%2Futil%2FCsvParser.java;h=b903f77226457bbab9ded55926899677e98d34f2;hb=3c1cdc594d954520b14646102b366290bdad58c7;hp=d133afdbc41f1d6d35fed0d1019aae5e9a31c289;hpb=088c1b517a543e935d8ab65c3b2fd2d0269b551d;p=lgpl%2Fargeo-commons.git diff --git a/org.argeo.util/src/org/argeo/util/CsvParser.java b/org.argeo.util/src/org/argeo/util/CsvParser.java index d133afdbc..b903f7722 100644 --- a/org.argeo.util/src/org/argeo/util/CsvParser.java +++ b/org.argeo.util/src/org/argeo/util/CsvParser.java @@ -1,24 +1,12 @@ -/* - * Copyright (C) 2007-2012 Argeo GmbH - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ package org.argeo.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -37,42 +25,77 @@ public abstract class CsvParser { /** * Actually process a parsed line. If - * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the - * header and the tokens are guaranteed to have the same size. + * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the header + * and the tokens are guaranteed to have the same size. * - * @param lineNumber - * the current line number, starts at 1 (the header, if header - * processing is enabled, the first line otherwise) - * @param header - * the read-only header or null if {@link #setNoHeader(Boolean)} - * is true (default is false) - * @param tokens - * the parsed tokens + * @param lineNumber the current line number, starts at 1 (the header, if header + * processing is enabled, the first line otherwise) + * @param header the read-only header or null if + * {@link #setNoHeader(Boolean)} is true (default is false) + * @param tokens the parsed tokens */ - protected abstract void processLine(Integer lineNumber, - List header, List tokens); + protected abstract void processLine(Integer lineNumber, List header, List tokens); /** * Parses the CSV file (stream is closed at the end) + * + * @param in the stream to parse + * + * @deprecated Use {@link #parse(InputStream, Charset)} instead. */ + @Deprecated public synchronized void parse(InputStream in) { - parse(in, null); + parse(in, (Charset) null); } /** * Parses the CSV file (stream is closed at the end) + * + * @param in the stream to parse + * @param encoding the encoding to use. + * + * @deprecated Use {@link #parse(InputStream, Charset)} instead. */ + @Deprecated public synchronized void parse(InputStream in, String encoding) { - BufferedReader reader = null; + Reader reader; + if (encoding == null) + reader = new InputStreamReader(in); + else + try { + reader = new InputStreamReader(in, encoding); + } catch (UnsupportedEncodingException e) { + throw new IllegalArgumentException(e); + } + parse(reader); + } + + /** + * Parses the CSV file (stream is closed at the end) + * + * @param in the stream to parse + * @param charset the charset to use + */ + public synchronized void parse(InputStream in, Charset charset) { + Reader reader; + if (charset == null) + reader = new InputStreamReader(in); + else + reader = new InputStreamReader(in, charset); + parse(reader); + } + + /** + * Parses the CSV file (stream is closed at the end) + * + * @param reader the reader to use (it will be buffered) + */ + public synchronized void parse(Reader reader) { Integer lineCount = 0; - try { - if (encoding == null) - reader = new BufferedReader(new InputStreamReader(in)); - else - reader = new BufferedReader(new InputStreamReader(in, encoding)); + try (BufferedReader bufferedReader = new BufferedReader(reader)) { List header = null; if (!noHeader) { - String headerStr = reader.readLine(); + String headerStr = bufferedReader.readLine(); if (headerStr == null)// empty file return; lineCount++; @@ -80,7 +103,7 @@ public abstract class CsvParser { StringBuffer currStr = new StringBuffer(""); Boolean wasInquote = false; while (parseLine(headerStr, header, currStr, wasInquote)) { - headerStr = reader.readLine(); + headerStr = bufferedReader.readLine(); if (headerStr == null) break; wasInquote = true; @@ -89,7 +112,7 @@ public abstract class CsvParser { } String line = null; - lines: while ((line = reader.readLine()) != null) { + lines: while ((line = bufferedReader.readLine()) != null) { line = preProcessLine(line); if (line == null) { // skip line @@ -100,7 +123,7 @@ public abstract class CsvParser { StringBuffer currStr = new StringBuffer(""); Boolean wasInquote = false; sublines: while (parseLine(line, tokens, currStr, wasInquote)) { - line = reader.readLine(); + line = bufferedReader.readLine(); if (line == null) break sublines; wasInquote = true; @@ -111,29 +134,22 @@ public abstract class CsvParser { if (tokenSize == 1 && line.trim().equals("")) continue lines;// empty line if (headerSize != tokenSize) { - throw new UtilsException("Token size " + tokenSize - + " is different from header size " - + headerSize + " at line " + lineCount - + ", line: " + line + ", header: " + header + throw new IllegalStateException("Token size " + tokenSize + " is different from header size " + + headerSize + " at line " + lineCount + ", line: " + line + ", header: " + header + ", tokens: " + tokens); } } processLine(lineCount, header, tokens); } - } catch (UtilsException e) { - throw e; } catch (IOException e) { - throw new UtilsException("Cannot parse CSV file (line: " - + lineCount + ")", e); - } finally { - StreamUtils.closeQuietly(reader); + throw new RuntimeException("Cannot parse CSV file (line: " + lineCount + ")", e); } } /** - * Called before each (logical) line is processed, giving a change to modify - * it (typically for cleaning dirty files). To be overridden, return the - * line unchanged by default. Skip the line if 'null' is returned. + * Called before each (logical) line is processed, giving a change to modify it + * (typically for cleaning dirty files). To be overridden, return the line + * unchanged by default. Skip the line if 'null' is returned. */ protected String preProcessLine(String line) { return line; @@ -144,25 +160,20 @@ public abstract class CsvParser { * * @return whether to continue parsing this line */ - protected Boolean parseLine(String str, List tokens, - StringBuffer currStr, Boolean wasInquote) { - // List tokens = new ArrayList(); - - // System.out.println("#LINE: " + str); - + protected Boolean parseLine(String str, List tokens, StringBuffer currStr, Boolean wasInquote) { if (wasInquote) currStr.append('\n'); char[] arr = str.toCharArray(); boolean inQuote = wasInquote; - // StringBuffer currStr = new StringBuffer(""); for (int i = 0; i < arr.length; i++) { char c = arr[i]; if (c == separator) { if (!inQuote) { tokens.add(currStr.toString()); - // System.out.println("# TOKEN: " + currStr); - currStr.delete(0, currStr.length()); +// currStr.delete(0, currStr.length()); + currStr.setLength(0); + currStr.trimToSize(); } else { // we don't remove separator that are in a quoted substring // System.out @@ -224,8 +235,7 @@ public abstract class CsvParser { return strictLineAsLongAsHeader; } - public synchronized void setStrictLineAsLongAsHeader( - Boolean strictLineAsLongAsHeader) { + public synchronized void setStrictLineAsLongAsHeader(Boolean strictLineAsLongAsHeader) { this.strictLineAsLongAsHeader = strictLineAsLongAsHeader; }