Add the ability to force encoding while parsing csv files + corresponding JUnit tests.
authorBruno Sinou <bsinou@argeo.org>
Tue, 1 Nov 2011 21:36:52 +0000 (21:36 +0000)
committerBruno Sinou <bsinou@argeo.org>
Tue, 1 Nov 2011 21:36:52 +0000 (21:36 +0000)
See https://bugzilla.argeo.org/show_bug.cgi?id=54

git-svn-id: https://svn.argeo.org/commons/trunk@4869 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/CsvParser.java
basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv [new file with mode: 0644]

diff --git a/basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs b/basic/runtime/org.argeo.basic.nodeps/.settings/org.eclipse.core.resources.prefs
new file mode 100644 (file)
index 0000000..aabe426
--- /dev/null
@@ -0,0 +1,3 @@
+#Tue Nov 01 16:23:51 CET 2011
+eclipse.preferences.version=1
+encoding//src/test/resources/org/argeo/util/TestParse-ISO.csv=ISO-8859-1
index c20f4e07c684c90902cef9fd473e16410286d64f..3a429f4adc23a634dfcff402203a20e2c83e04d0 100644 (file)
@@ -40,11 +40,17 @@ public abstract class CsvParser {
                        List<String> header, List<String> tokens);
 
        public synchronized void parse(InputStream in) {
+               parse(in, null);
+       }
+
+       public synchronized void parse(InputStream in, String encoding) {
                BufferedReader reader = null;
                Integer lineCount = 0;
                try {
-                       reader = new BufferedReader(new InputStreamReader(in));
-
+                       if (encoding == null)
+                               reader = new BufferedReader(new InputStreamReader(in));
+                       else
+                               reader = new BufferedReader(new InputStreamReader(in, encoding));
                        List<String> header = null;
                        if (!noHeader) {
                                String headerStr = reader.readLine();
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java b/basic/runtime/org.argeo.basic.nodeps/src/test/java/org/argeo/util/CsvParserEncodingTestCase.java
new file mode 100644 (file)
index 0000000..10929c3
--- /dev/null
@@ -0,0 +1,38 @@
+package org.argeo.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class CsvParserEncodingTestCase extends TestCase {
+
+       private String iso = "ISO-8859-1";
+       private String utf8 = "UTF-8";
+
+       public void testParse() throws Exception {
+
+               String xml = new String("áéíóúñ,éééé");
+               byte[] utfBytes = xml.getBytes(utf8);
+               byte[] isoBytes = xml.getBytes(iso);
+
+               InputStream inUtf = new ByteArrayInputStream(utfBytes);
+               InputStream inIso = new ByteArrayInputStream(isoBytes);
+
+               CsvParser csvParser = new CsvParser() {
+                       protected void processLine(Integer lineNumber, List<String> header,
+                                       List<String> tokens) {
+                               assertEquals(header.size(), tokens.size());
+                               assertEquals(2, tokens.size());
+                               assertEquals("áéíóúñ", tokens.get(0));
+                               assertEquals("éééé", tokens.get(1));
+                       }
+               };
+
+               csvParser.parse(inUtf, utf8);
+               inUtf.close();
+               csvParser.parse(inIso, iso);
+               inIso.close();
+       }
+}
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-ISO.csv
new file mode 100644 (file)
index 0000000..0bec611
--- /dev/null
@@ -0,0 +1,8 @@
+"Date d'imputation","N° de compte","Code journal","Pièce interne","Pièce externe","Libellé d'écriture","Débit","Crédit","Lettrage","Quantité","Code analytique","Date d'échéance","Date d'imputation origine","Code journal origine","Mode de règlement","Date début de période","Date fin de période"
+26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"3.000,00",,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"7.000,00",,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"411OPEN","BQ","BQ01.10",,"Vir Client ",,"2.508,00","A",,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"455100","BQ","BQ01.10",,"Bankomat Raiffeise","250,00",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"512101","BQ","BQ01.10",,"Extrait bancaire 01.10","12.250,55",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"627800","BQ","BQ01.10",,"Envoi de chequier","2,30",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"627800","BQ","BQ01.10",,"Frais d'expedition","5,15",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv b/basic/runtime/org.argeo.basic.nodeps/src/test/resources/org/argeo/util/TestParse-UTF-8.csv
new file mode 100644 (file)
index 0000000..0bec611
--- /dev/null
@@ -0,0 +1,8 @@
+"Date d'imputation","N° de compte","Code journal","Pièce interne","Pièce externe","Libellé d'écriture","Débit","Crédit","Lettrage","Quantité","Code analytique","Date d'échéance","Date d'imputation origine","Code journal origine","Mode de règlement","Date début de période","Date fin de période"
+26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"3.000,00",,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"101300","BQ","BQ01.10",,"Depot société en formation",,"7.000,00",,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"411OPEN","BQ","BQ01.10",,"Vir Client ",,"2.508,00","A",,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"455100","BQ","BQ01.10",,"Bankomat Raiffeise","250,00",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"512101","BQ","BQ01.10",,"Extrait bancaire 01.10","12.250,55",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"627800","BQ","BQ01.10",,"Envoi de chequier","2,30",,,,,"          ",26.01.2010,"BQ","    ","          ","          "
+26.01.2010,"627800","BQ","BQ01.10",,"Frais d'expedition","5,15",,,,,"          ",26.01.2010,"BQ","    ","          ","          "