]> git.argeo.org Git - lgpl/argeo-commons.git/blobdiff - basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
Update license headers
[lgpl/argeo-commons.git] / basic / runtime / org.argeo.basic.nodeps / src / main / java / org / argeo / util / CsvParser.java
index 9acbfe246ce07ec67da94721fa25b7c23a5e7a9e..127d0f50928f9b358c5f376a9ba8704f9d9ba697 100644 (file)
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2007-2012 Mathieu Baudier
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.argeo.util;
 
 import java.io.BufferedReader;
@@ -29,22 +44,28 @@ public abstract class CsvParser {
         * 
         * @param lineNumber
         *            the current line number, starts at 1 (the header, if header
-        *            processing is enabled, the first lien otherwise)
+        *            processing is enabled, the first line otherwise)
         * @param header
         *            the read-only header or null if {@link #setNoHeader(Boolean)}
         *            is true (default is false)
         * @param tokens
-        *            the parse tokens
+        *            the parsed tokens
         */
        protected abstract void processLine(Integer lineNumber,
                        List<String> header, List<String> tokens);
 
        public synchronized void parse(InputStream in) {
+               parse(in, null);
+       }
+
+       public synchronized void parse(InputStream in, String encoding) {
                BufferedReader reader = null;
                Integer lineCount = 0;
                try {
-                       reader = new BufferedReader(new InputStreamReader(in));
-
+                       if (encoding == null)
+                               reader = new BufferedReader(new InputStreamReader(in));
+                       else
+                               reader = new BufferedReader(new InputStreamReader(in, encoding));
                        List<String> header = null;
                        if (!noHeader) {
                                String headerStr = reader.readLine();
@@ -55,6 +76,9 @@ public abstract class CsvParser {
                                StringBuffer currStr = new StringBuffer("");
                                Boolean wasInquote = false;
                                while (parseLine(headerStr, header, currStr, wasInquote)) {
+                                       headerStr = reader.readLine();
+                                       if (headerStr == null)
+                                               break;
                                        wasInquote = true;
                                }
                                header = Collections.unmodifiableList(header);
@@ -62,14 +86,19 @@ public abstract class CsvParser {
 
                        String line = null;
                        lines: while ((line = reader.readLine()) != null) {
+                               line = preProcessLine(line);
+                               if (line == null) {
+                                       // skip line
+                                       continue lines;
+                               }
                                lineCount++;
                                List<String> tokens = new ArrayList<String>();
                                StringBuffer currStr = new StringBuffer("");
                                Boolean wasInquote = false;
-                               while (parseLine(line, tokens, currStr, wasInquote)) {
+                               sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
                                        line = reader.readLine();
                                        if (line == null)
-                                               break;
+                                               break sublines;
                                        wasInquote = true;
                                }
                                if (!noHeader && strictLineAsLongAsHeader) {
@@ -102,6 +131,15 @@ public abstract class CsvParser {
                }
        }
 
+       /**
+        * Called before each (logical) line is processed, giving a change to modify
+        * it (typically for cleaning dirty files). To be overridden, return the
+        * line unchanged by default. Skip the line if 'null' is returned.
+        */
+       protected String preProcessLine(String line) {
+               return line;
+       }
+
        /**
         * Parses a line character by character for performance purpose
         * 
@@ -124,7 +162,7 @@ public abstract class CsvParser {
                        if (c == separator) {
                                if (!inQuote) {
                                        tokens.add(currStr.toString());
-                                       //System.out.println("# TOKEN: " + currStr);
+                                       // System.out.println("# TOKEN: " + currStr);
                                        currStr.delete(0, currStr.length());
                                } else {
                                        // we don't remove separator that are in a quoted substring