+++ /dev/null
-package org.argeo.util;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Parses a CSV file interpreting the first line as a header. The
- * {@link #parse(InputStream)} method and the setters are synchronized so that
- * the object cannot be modified when parsing.
- */
-public abstract class CsvParser {
- private char separator = ',';
- private char quote = '\"';
-
- private Boolean noHeader = false;
- private Boolean strictLineAsLongAsHeader = true;
-
- /**
- * Actually process a parsed line. If
- * {@link #setStrictLineAsLongAsHeader(Boolean)} is true (default) the header
- * and the tokens are guaranteed to have the same size.
- *
- * @param lineNumber the current line number, starts at 1 (the header, if header
- * processing is enabled, the first line otherwise)
- * @param header the read-only header or null if
- * {@link #setNoHeader(Boolean)} is true (default is false)
- * @param tokens the parsed tokens
- */
- protected abstract void processLine(Integer lineNumber, List<String> header, List<String> tokens);
-
- /**
- * Parses the CSV file (stream is closed at the end)
- *
- * @param in the stream to parse
- *
- * @deprecated Use {@link #parse(InputStream, Charset)} instead.
- */
- @Deprecated
- public synchronized void parse(InputStream in) {
- parse(in, (Charset) null);
- }
-
- /**
- * Parses the CSV file (stream is closed at the end)
- *
- * @param in the stream to parse
- * @param encoding the encoding to use.
- *
- * @deprecated Use {@link #parse(InputStream, Charset)} instead.
- */
- @Deprecated
- public synchronized void parse(InputStream in, String encoding) {
- Reader reader;
- if (encoding == null)
- reader = new InputStreamReader(in);
- else
- try {
- reader = new InputStreamReader(in, encoding);
- } catch (UnsupportedEncodingException e) {
- throw new IllegalArgumentException(e);
- }
- parse(reader);
- }
-
- /**
- * Parses the CSV file (stream is closed at the end)
- *
- * @param in the stream to parse
- * @param charset the charset to use
- */
- public synchronized void parse(InputStream in, Charset charset) {
- Reader reader;
- if (charset == null)
- reader = new InputStreamReader(in);
- else
- reader = new InputStreamReader(in, charset);
- parse(reader);
- }
-
- /**
- * Parses the CSV file (stream is closed at the end)
- *
- * @param reader the reader to use (it will be buffered)
- */
- public synchronized void parse(Reader reader) {
- Integer lineCount = 0;
- try (BufferedReader bufferedReader = new BufferedReader(reader)) {
- List<String> header = null;
- if (!noHeader) {
- String headerStr = bufferedReader.readLine();
- if (headerStr == null)// empty file
- return;
- lineCount++;
- header = new ArrayList<String>();
- StringBuffer currStr = new StringBuffer("");
- Boolean wasInquote = false;
- while (parseLine(headerStr, header, currStr, wasInquote)) {
- headerStr = bufferedReader.readLine();
- if (headerStr == null)
- break;
- wasInquote = true;
- }
- header = Collections.unmodifiableList(header);
- }
-
- String line = null;
- lines: while ((line = bufferedReader.readLine()) != null) {
- line = preProcessLine(line);
- if (line == null) {
- // skip line
- continue lines;
- }
- lineCount++;
- List<String> tokens = new ArrayList<String>();
- StringBuffer currStr = new StringBuffer("");
- Boolean wasInquote = false;
- sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
- line = bufferedReader.readLine();
- if (line == null)
- break sublines;
- wasInquote = true;
- }
- if (!noHeader && strictLineAsLongAsHeader) {
- int headerSize = header.size();
- int tokenSize = tokens.size();
- if (tokenSize == 1 && line.trim().equals(""))
- continue lines;// empty line
- if (headerSize != tokenSize) {
- throw new IllegalStateException("Token size " + tokenSize + " is different from header size "
- + headerSize + " at line " + lineCount + ", line: " + line + ", header: " + header
- + ", tokens: " + tokens);
- }
- }
- processLine(lineCount, header, tokens);
- }
- } catch (IOException e) {
- throw new RuntimeException("Cannot parse CSV file (line: " + lineCount + ")", e);
- }
- }
-
- /**
- * Called before each (logical) line is processed, giving a change to modify it
- * (typically for cleaning dirty files). To be overridden, return the line
- * unchanged by default. Skip the line if 'null' is returned.
- */
- protected String preProcessLine(String line) {
- return line;
- }
-
- /**
- * Parses a line character by character for performance purpose
- *
- * @return whether to continue parsing this line
- */
- protected Boolean parseLine(String str, List<String> tokens, StringBuffer currStr, Boolean wasInquote) {
- if (wasInquote)
- currStr.append('\n');
-
- char[] arr = str.toCharArray();
- boolean inQuote = wasInquote;
- for (int i = 0; i < arr.length; i++) {
- char c = arr[i];
- if (c == separator) {
- if (!inQuote) {
- tokens.add(currStr.toString());
-// currStr.delete(0, currStr.length());
- currStr.setLength(0);
- currStr.trimToSize();
- } else {
- // we don't remove separator that are in a quoted substring
- // System.out
- // .println("IN QUOTE, got a separator: [" + c + "]");
- currStr.append(c);
- }
- } else if (c == quote) {
- if (inQuote && (i + 1) < arr.length && arr[i + 1] == quote) {
- // case of double quote
- currStr.append(quote);
- i++;
- } else {// standard
- inQuote = inQuote ? false : true;
- }
- } else {
- currStr.append(c);
- }
- }
-
- if (!inQuote) {
- tokens.add(currStr.toString());
- // System.out.println("# TOKEN: " + currStr);
- }
- // if (inQuote)
- // throw new ArgeoException("Missing quote at the end of the line "
- // + str + " (parsed: " + tokens + ")");
- if (inQuote)
- return true;
- else
- return false;
- // return tokens;
- }
-
- public char getSeparator() {
- return separator;
- }
-
- public synchronized void setSeparator(char separator) {
- this.separator = separator;
- }
-
- public char getQuote() {
- return quote;
- }
-
- public synchronized void setQuote(char quote) {
- this.quote = quote;
- }
-
- public Boolean getNoHeader() {
- return noHeader;
- }
-
- public synchronized void setNoHeader(Boolean noHeader) {
- this.noHeader = noHeader;
- }
-
- public Boolean getStrictLineAsLongAsHeader() {
- return strictLineAsLongAsHeader;
- }
-
- public synchronized void setStrictLineAsLongAsHeader(Boolean strictLineAsLongAsHeader) {
- this.strictLineAsLongAsHeader = strictLineAsLongAsHeader;
- }
-
-}