String line = null;
lines: while ((line = reader.readLine()) != null) {
+ line = preProcessLine(line);
+ if (line == null) {
+ // skip line
+ continue lines;
+ }
lineCount++;
List<String> tokens = new ArrayList<String>();
StringBuffer currStr = new StringBuffer("");
Boolean wasInquote = false;
- while (parseLine(line, tokens, currStr, wasInquote)) {
+ sublines: while (parseLine(line, tokens, currStr, wasInquote)) {
line = reader.readLine();
if (line == null)
- break;
+ break sublines;
wasInquote = true;
}
if (!noHeader && strictLineAsLongAsHeader) {
}
}
+ /**
+ * Called before each (logical) line is processed, giving a change to modify
+ * it (typically for cleaning dirty files). To be overridden, return the
+ * line unchanged by default. Skip the line if 'null' is returned.
+ */
+ protected String preProcessLine(String line) {
+ return line;
+ }
+
/**
* Parses a line character by character for performance purpose
*
import java.io.IOException;
import java.io.OutputStream;
-import java.io.PrintWriter;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
import java.util.Iterator;
import java.util.List;
/** Write in CSV format. */
public class CsvWriter {
- private final PrintWriter out;
+ private final Writer out;
private char separator = ',';
private char quote = '\"';
/**
- * Creates a CSV writer. The header will be written immediately to the
- * stream.
+ * Creates a CSV writer.
*
* @param out
* the stream to write to. Caller is responsible for closing it.
*/
public CsvWriter(OutputStream out) {
- super();
- this.out = new PrintWriter(out);
+ this.out = new OutputStreamWriter(out);
+ }
+
+ /**
+ * Creates a CSV writer.
+ *
+ * @param out
+ * the stream to write to. Caller is responsible for closing it.
+ */
+ public CsvWriter(OutputStream out, String encoding) {
+ try {
+ this.out = new OutputStreamWriter(out, encoding);
+ } catch (UnsupportedEncodingException e) {
+ throw new ArgeoException("Cannot initialize CSV writer", e);
+ }
}
/**
while (it.hasNext()) {
writeToken(it.next().toString());
if (it.hasNext())
- out.print(separator);
+ out.write(separator);
}
- out.print('\n');
+ out.write('\n');
out.flush();
} catch (IOException e) {
throw new ArgeoException("Could not write " + tokens, e);
for (int i = 0; i < tokens.length; i++) {
writeToken(tokens[i].toString());
if (i != (tokens.length - 1))
- out.print(separator);
+ out.write(separator);
}
- out.print('\n');
+ out.write('\n');
out.flush();
} catch (IOException e) {
throw new ArgeoException("Could not write " + tokens, e);
// +2 for possible quotes, another +2 assuming there would be an already
// quoted string where quotes needs to be duplicated
// another +2 for safety
+ // we don't want to increase buffer size while writing
StringBuffer buf = new StringBuffer(token.length() + 6);
char[] arr = token.toCharArray();
boolean shouldQuote = false;
}
if (shouldQuote == true)
- out.print(quote);
- out.print(buf.toString());
+ out.write(quote);
+ out.write(buf.toString());
if (shouldQuote == true)
- out.print(quote);
+ out.write(quote);
}
public void setSeparator(char separator) {