From 366325583b0b101f3e78fcfda65d3f856cef8617 Mon Sep 17 00:00:00 2001 From: Bruno Sinou Date: Tue, 1 Nov 2011 21:36:52 +0000 Subject: [PATCH] Add the ability to force encoding while parsing csv files + corresponding JUnit tests. See https://bugzilla.argeo.org/show_bug.cgi?id=54 git-svn-id: https://svn.argeo.org/commons/trunk@4869 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../org.eclipse.core.resources.prefs | 3 ++ .../main/java/org/argeo/util/CsvParser.java | 10 ++++- .../argeo/util/CsvParserEncodingTestCase.java | 38 +++++++++++++++++++ .../org/argeo/util/TestParse-ISO.csv | 8 ++++ .../org/argeo/util/TestParse-UTF-8.csv | 8 ++++ 5 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv diff --git a/basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs b/basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 000000000..aabe426b9 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,3 @@ +#Tue Nov 01 16:23:51 CET 2011 +eclipse.preferences.version=1 +encoding//src/test/resources/org/argeo/util/TestParse-ISO.csv=ISO-8859-1 diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java index c20f4e07c..3a429f4ad 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java @@ -40,11 +40,17 @@ public abstract class CsvParser { List header, List tokens); public synchronized void parse(InputStream in) { + parse(in, null); + } + + public synchronized void parse(InputStream in, String encoding) { BufferedReader reader = null; Integer lineCount = 0; try { - reader = new BufferedReader(new InputStreamReader(in)); - + if (encoding == null) + reader = new BufferedReader(new InputStreamReader(in)); + else + reader = new BufferedReader(new InputStreamReader(in, encoding)); List header = null; if (!noHeader) { String headerStr = reader.readLine(); diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java new file mode 100644 index 000000000..10929c3b6 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java @@ -0,0 +1,38 @@ +package org.argeo.util; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.List; + +import junit.framework.TestCase; + +public class CsvParserEncodingTestCase extends TestCase { + + private String iso = "ISO-8859-1"; + private String utf8 = "UTF-8"; + + public void testParse() throws Exception { + + String xml = new String("áéíóúñ,éééé"); + byte[] utfBytes = xml.getBytes(utf8); + byte[] isoBytes = xml.getBytes(iso); + + InputStream inUtf = new ByteArrayInputStream(utfBytes); + InputStream inIso = new ByteArrayInputStream(isoBytes); + + CsvParser csvParser = new CsvParser() { + protected void processLine(Integer lineNumber, List header, + List tokens) { + assertEquals(header.size(), tokens.size()); + assertEquals(2, tokens.size()); + assertEquals("áéíóúñ", tokens.get(0)); + assertEquals("éééé", tokens.get(1)); + } + }; + + csvParser.parse(inUtf, utf8); + inUtf.close(); + csvParser.parse(inIso, iso); + inIso.close(); + } +} diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv new file mode 100644 index 000000000..0bec61111 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv @@ -0,0 +1,8 @@ +"Date d'imputation","N° de compte","Code journal","Pièce interne","Pièce externe","Libellé d'écriture","Débit","Crédit","Lettrage","Quantité","Code analytique","Date d'échéance","Date d'imputation origine","Code journal origine","Mode de règlement","Date début de période","Date fin de période" +26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"3.000,00",,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"7.000,00",,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"411OPEN","BQ","BQ01.10",,"Vir Client ",,"2.508,00","A",,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"455100","BQ","BQ01.10",,"Bankomat Raiffeise","250,00",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"512101","BQ","BQ01.10",,"Extrait bancaire 01.10","12.250,55",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"627800","BQ","BQ01.10",,"Envoi de chequier","2,30",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"627800","BQ","BQ01.10",,"Frais d'expedition","5,15",,,,," ",26.01.2010,"BQ"," "," "," " diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv new file mode 100644 index 000000000..0bec61111 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv @@ -0,0 +1,8 @@ +"Date d'imputation","N° de compte","Code journal","Pièce interne","Pièce externe","Libellé d'écriture","Débit","Crédit","Lettrage","Quantité","Code analytique","Date d'échéance","Date d'imputation origine","Code journal origine","Mode de règlement","Date début de période","Date fin de période" +26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"3.000,00",,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"7.000,00",,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"411OPEN","BQ","BQ01.10",,"Vir Client ",,"2.508,00","A",,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"455100","BQ","BQ01.10",,"Bankomat Raiffeise","250,00",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"512101","BQ","BQ01.10",,"Extrait bancaire 01.10","12.250,55",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"627800","BQ","BQ01.10",,"Envoi de chequier","2,30",,,,," ",26.01.2010,"BQ"," "," "," " +26.01.2010,"627800","BQ","BQ01.10",,"Frais d'expedition","5,15",,,,," ",26.01.2010,"BQ"," "," "," " -- 2.39.2