First working tabular content in JCR
authorMathieu Baudier <mbaudier@argeo.org>
Tue, 20 Sep 2011 19:16:16 +0000 (19:16 +0000)
committerMathieu Baudier <mbaudier@argeo.org>
Tue, 20 Sep 2011 19:16:16 +0000 (19:16 +0000)
git-svn-id: https://svn.argeo.org/commons/trunk@4744 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc

13 files changed:
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java [new file with mode: 0644]
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularContent.java
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRow.java
basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java [new file with mode: 0644]
server/runtime/org.argeo.server.jcr/build.properties
server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoNames.java
server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoTypes.java
server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java [new file with mode: 0644]
server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java [new file with mode: 0644]
server/runtime/org.argeo.server.jcr/src/main/resources/org/argeo/jcr/argeo.cnd
server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java [new file with mode: 0644]

diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java
new file mode 100644 (file)
index 0000000..311b0fa
--- /dev/null
@@ -0,0 +1,21 @@
+package org.argeo.util.tabular;
+
+import java.util.List;
+
+/** Minimal tabular row wrapping an {@link Object} array */
+public class ArrayTabularRow implements TabularRow {
+       private final Object[] arr;
+
+       public ArrayTabularRow(List<?> objs) {
+               this.arr = objs.toArray();
+       }
+
+       public Object get(Integer col) {
+               return arr[col];
+       }
+
+       public int size() {
+               return arr.length;
+       }
+
+}
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java
new file mode 100644 (file)
index 0000000..20a275a
--- /dev/null
@@ -0,0 +1,23 @@
+package org.argeo.util.tabular;
+
+import java.io.OutputStream;
+import java.util.List;
+
+import org.argeo.util.CsvWriter;
+
+/** Write tabular content in a stream as CSV. Wraps a {@link CsvWriter}. */
+public class CsvTabularWriter implements TabularWriter {
+       private CsvWriter csvWriter;
+
+       public CsvTabularWriter(OutputStream out) {
+               this.csvWriter = new CsvWriter(out);
+       }
+
+       public void appendRow(List<?> row) {
+               csvWriter.writeLine(row);
+       }
+
+       public void close() {
+       }
+
+}
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java
new file mode 100644 (file)
index 0000000..4a7abf7
--- /dev/null
@@ -0,0 +1,35 @@
+package org.argeo.util.tabular;
+
+/** The column in a tabular content */
+public class TabularColumn {
+       private String name;
+       /**
+        * JCR types, see
+        * http://www.day.com/maven/javax.jcr/javadocs/jcr-2.0/index.html
+        * ?javax/jcr/Property.html
+        */
+       private Integer type;
+
+       public TabularColumn(String name, Integer type) {
+               super();
+               this.name = name;
+               this.type = type;
+       }
+
+       public String getName() {
+               return name;
+       }
+
+       public void setName(String name) {
+               this.name = name;
+       }
+
+       public Integer getType() {
+               return type;
+       }
+
+       public void setType(Integer type) {
+               this.type = type;
+       }
+
+}
index d5d05192c355c69f6482fa022dc56efbb04747e9..eadfda34cbdf9c5fce94019587c0498402a85fb5 100644 (file)
@@ -8,5 +8,7 @@ import java.util.List;
  */
 public interface TabularContent {
        /** The headers of this table or <code>null</code> is none available. */
-       public List<String> getHeaders();
+       public List<TabularColumn> getColumns();
+
+       public TabularRowIterator read();
 }
index cf8c0f1a3c5e8ee14acf617b823898079f65e698..0d8ba4fdeaa3ca7ff2187de0f8a74e20a8e4cec5 100644 (file)
@@ -2,5 +2,7 @@ package org.argeo.util.tabular;
 
 /** A row of tabular data */
 public interface TabularRow {
+       public Object get(Integer col);
 
+       public int size();
 }
diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java
new file mode 100644 (file)
index 0000000..ab49359
--- /dev/null
@@ -0,0 +1,12 @@
+package org.argeo.util.tabular;
+
+import java.util.List;
+
+/** Write to a tabular content */
+public interface TabularWriter {
+       /** Append a new row of data */
+       public void appendRow(List<?> row);
+
+       /** Finish persisting data and release resources */
+       public void close();
+}
index b0df025637d2303f5a8a1967c5bb244384403338..0beaef5b1b9c9342f299f83ede11eb2080fc0488 100644 (file)
@@ -17,5 +17,6 @@ additional.bundles = com.springsource.slf4j.api,\
                      com.springsource.org.apache.commons.dbcp,\
                      com.springsource.org.apache.commons.pool,\
                      org.argeo.dep.osgi.jackrabbit,\
-                     com.springsource.org.h2
+                     com.springsource.org.h2,\
+                     org.argeo.dep.osgi.tika
 
index e24ca43865844db66ed2c38dd4b89f90dcfbff0c..86a909483635f9b71516afc7040f7c63ec9072db 100644 (file)
@@ -2,6 +2,7 @@ package org.argeo.jcr;
 
 /** JCR names in the http://www.argeo.org/argeo namespace */
 public interface ArgeoNames {
+       public final static String ARGEO_NAMESPACE = "http://www.argeo.org/ns/argeo";
        public final static String ARGEO_ = "argeo:";
 
        public final static String ARGEO_URI = "argeo:uri";
@@ -15,4 +16,7 @@ public interface ArgeoNames {
        public final static String ARGEO_PRIMARY_EMAIL = "argeo:primaryEmail";
        public final static String ARGEO_PRIMARY_ORGANIZATION = "argeo:primaryOrganization";
 
+       // tabular
+       public final static String ARGEO_IS_KEY = "argeo:isKey";
+
 }
index 6410b79ade5488665e7db626b3b39872d439d6cd..8dfab71daedd91cc79fd487e052327f0c8bc8d66 100644 (file)
@@ -5,4 +5,9 @@ public interface ArgeoTypes {
        public final static String ARGEO_LINK = "argeo:link";
        public final static String ARGEO_USER_HOME = "argeo:userHome";
        public final static String ARGEO_USER_PROFILE = "argeo:userProfile";
+
+       // tabular
+       public final static String ARGEO_TABLE = "argeo:table";
+       public final static String ARGEO_COLUMN = "argeo:column";
+       public final static String ARGEO_CSV = "argeo:csv";
 }
diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java
new file mode 100644 (file)
index 0000000..238e7d2
--- /dev/null
@@ -0,0 +1,171 @@
+package org.argeo.jcr.tabular;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import javax.jcr.Binary;
+import javax.jcr.Node;
+import javax.jcr.NodeIterator;
+import javax.jcr.Property;
+import javax.jcr.PropertyType;
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.IOUtils;
+import org.argeo.ArgeoException;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.JcrUtils;
+import org.argeo.util.CsvParser;
+import org.argeo.util.tabular.ArrayTabularRow;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularRow;
+import org.argeo.util.tabular.TabularRowIterator;
+
+/** Iterates over the rows of a {@link ArgeoTypes#ARGEO_TABLE} node. */
+public class JcrTabularRowIterator implements TabularRowIterator {
+       private Boolean hasNext = null;
+       private Boolean parsingCompleted = false;
+
+       private Long currentRowNumber = 0l;
+
+       private List<TabularColumn> header = new ArrayList<TabularColumn>();
+
+       /** referenced so that we can close it */
+       private Binary binary;
+       private InputStream in;
+
+       private CsvParser csvParser;
+       private ArrayBlockingQueue<List<String>> textLines;
+
+       public JcrTabularRowIterator(Node tableNode) {
+               try {
+                       Node contentNode = tableNode.getNode(Property.JCR_CONTENT);
+                       for (NodeIterator it = tableNode.getNodes(); it.hasNext();) {
+                               Node node = it.nextNode();
+                               if (node.isNodeType(ArgeoTypes.ARGEO_COLUMN)) {
+                                       Integer type = PropertyType.valueFromName(node.getProperty(
+                                                       Property.JCR_REQUIRED_TYPE).getString());
+                                       TabularColumn tc = new TabularColumn(node.getName(), type);
+                                       header.add(tc);
+                                       // } else if (node.getName().equals(Property.JCR_CONTENT)) {
+                                       // contentNode = node;
+                               }
+                       }
+                       // should not happen since content is mandatory
+                       assert contentNode != null;
+
+                       if (contentNode.isNodeType(ArgeoTypes.ARGEO_CSV)) {
+                               textLines = new ArrayBlockingQueue<List<String>>(1000);
+                               csvParser = new CsvParser() {
+                                       protected void processLine(Integer lineNumber,
+                                                       List<String> header, List<String> tokens) {
+                                               try {
+                                                       textLines.put(tokens);
+                                               } catch (InterruptedException e) {
+                                                       // TODO Auto-generated catch block
+                                                       e.printStackTrace();
+                                               }
+                                               //textLines.add(tokens);
+                                               if (hasNext == null) {
+                                                       hasNext = true;
+                                                       synchronized (JcrTabularRowIterator.this) {
+                                                               JcrTabularRowIterator.this.notifyAll();
+                                                       }
+                                               }
+                                       }
+                               };
+                               csvParser.setNoHeader(true);
+                               binary = contentNode.getProperty(Property.JCR_DATA).getBinary();
+                               in = binary.getStream();
+                               Thread thread = new Thread(contentNode.getPath() + " reader") {
+                                       public void run() {
+                                               try {
+                                                       csvParser.parse(in);
+                                               } finally {
+                                                       parsingCompleted = true;
+                                                       IOUtils.closeQuietly(in);
+                                               }
+                                       }
+                               };
+                               thread.start();
+                       }
+               } catch (RepositoryException e) {
+                       throw new ArgeoException("Cannot read table " + tableNode, e);
+               }
+       }
+
+       public synchronized boolean hasNext() {
+               // we don't know if there is anything available
+               // while (hasNext == null)
+               // try {
+               // wait();
+               // } catch (InterruptedException e) {
+               // // silent
+               // // FIXME better deal with interruption
+               // Thread.currentThread().interrupt();
+               // break;
+               // }
+
+               // buffer not empty
+               if (!textLines.isEmpty())
+                       return true;
+
+               // maybe the parsing is finished but the flag has not been set
+               while (!parsingCompleted && textLines.isEmpty())
+                       try {
+                               wait(100);
+                       } catch (InterruptedException e) {
+                               // silent
+                               // FIXME better deal with interruption
+                               Thread.currentThread().interrupt();
+                               break;
+                       }
+
+               // buffer not empty
+               if (!textLines.isEmpty())
+                       return true;
+
+               // (parsingCompleted && textLines.isEmpty())
+                       return false;
+
+//             if (!hasNext && textLines.isEmpty()) {
+//                     if (in != null) {
+//                             IOUtils.closeQuietly(in);
+//                             in = null;
+//                     }
+//                     if (binary != null) {
+//                             JcrUtils.closeQuietly(binary);
+//                             binary = null;
+//                     }
+//                     return false;
+//             } else
+//                     return true;
+       }
+
+       public synchronized TabularRow next() {
+               try {
+                       List<String> tokens = textLines.take();
+                       List<Object> objs = new ArrayList<Object>(tokens.size());
+                       for (String token : tokens) {
+                               // TODO convert to other formats using header
+                               objs.add(token);
+                       }
+                       currentRowNumber++;
+                       return new ArrayTabularRow(objs);
+               } catch (InterruptedException e) {
+                       // silent
+                       // FIXME better deal with interruption
+               }
+               return null;
+       }
+
+       public void remove() {
+               throw new UnsupportedOperationException();
+       }
+
+       public Long getCurrentRowNumber() {
+               return currentRowNumber;
+       }
+
+}
diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java
new file mode 100644 (file)
index 0000000..cd6feb5
--- /dev/null
@@ -0,0 +1,71 @@
+package org.argeo.jcr.tabular;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import javax.jcr.Binary;
+import javax.jcr.Node;
+import javax.jcr.Property;
+import javax.jcr.PropertyType;
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.IOUtils;
+import org.argeo.ArgeoException;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.JcrUtils;
+import org.argeo.util.CsvWriter;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularWriter;
+
+/** Write / reference tabular content in a JCR repository. */
+public class JcrTabularWriter implements TabularWriter {
+       private Node contentNode;
+       private ByteArrayOutputStream out;
+       private CsvWriter csvWriter;
+
+       /** Creates a table node */
+       public JcrTabularWriter(Node tableNode, List<TabularColumn> columns,
+                       String contentNodeType) {
+               try {
+                       for (TabularColumn column : columns) {
+                               Node columnNode = tableNode.addNode(column.getName(),
+                                               ArgeoTypes.ARGEO_COLUMN);
+                               columnNode.setProperty(Property.JCR_REQUIRED_TYPE,
+                                               PropertyType.nameFromValue(column.getType()));
+                       }
+                       contentNode = tableNode.addNode(Property.JCR_CONTENT,
+                                       contentNodeType);
+                       if (contentNodeType.equals(ArgeoTypes.ARGEO_CSV)) {
+                               contentNode.setProperty(Property.JCR_MIMETYPE, "text/csv");
+                               contentNode.setProperty(Property.JCR_ENCODING, "UTF-8");
+                               out = new ByteArrayOutputStream();
+                               csvWriter = new CsvWriter(out);
+                       }
+               } catch (RepositoryException e) {
+                       throw new ArgeoException("Cannot create table node " + tableNode, e);
+               }
+       }
+
+       public void appendRow(List<?> row) {
+               csvWriter.writeLine(row);
+       }
+
+       public void close() {
+               Binary binary = null;
+               InputStream in = null;
+               try {
+                       // TODO parallelize with pipes and writing from another thread
+                       in = new ByteArrayInputStream(out.toByteArray());
+                       binary = contentNode.getSession().getValueFactory()
+                                       .createBinary(in);
+                       contentNode.setProperty(Property.JCR_DATA, binary);
+               } catch (RepositoryException e) {
+                       throw new ArgeoException("Cannot store data in " + contentNode, e);
+               } finally {
+                       IOUtils.closeQuietly(in);
+                       JcrUtils.closeQuietly(binary);
+               }
+       }
+}
index 19fba377549ce3490e07ed7e8f7963e2fafb2d62..8fa59aceb452dcfc0f647995bc2d3e2f7039c10b 100644 (file)
@@ -16,3 +16,12 @@ mixin
 [argeo:userProfile] > mix:created, mix:lastModified, mix:title, mix:versionable
 mixin
 - argeo:userID (STRING) m
+
+// TABULAR CONTENT
+[argeo:table] > nt:file
++ * (argeo:column) *
+
+[argeo:column] > mix:title
+- jcr:requiredType (STRING) = 'STRING'
+
+[argeo:csv] > nt:resource
diff --git a/server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java b/server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java
new file mode 100644 (file)
index 0000000..f91917d
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2010 Mathieu Baudier <mbaudier@argeo.org>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.argeo.jcr.tabular;
+
+import java.io.File;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.jcr.Node;
+import javax.jcr.PropertyType;
+import javax.jcr.Repository;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.jackrabbit.commons.cnd.CndImporter;
+import org.apache.jackrabbit.core.TransientRepository;
+import org.argeo.jcr.ArgeoNames;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.unit.AbstractJcrTestCase;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularRow;
+import org.argeo.util.tabular.TabularRowIterator;
+import org.argeo.util.tabular.TabularWriter;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.core.io.Resource;
+
+public class JcrTabularTest extends AbstractJcrTestCase {
+       private final static Log log = LogFactory.getLog(JcrTabularTest.class);
+
+       public void testWriteReadCsv() throws Exception {
+               session().setNamespacePrefix("argeo", ArgeoNames.ARGEO_NAMESPACE);
+               InputStreamReader reader = new InputStreamReader(getClass()
+                               .getResourceAsStream("/org/argeo/jcr/argeo.cnd"));
+               CndImporter.registerNodeTypes(reader, session());
+               reader.close();
+
+               // write
+               Integer columnCount = 15;
+               Long rowCount = 10000l;
+               String stringValue = "test, \ntest";
+
+               List<TabularColumn> header = new ArrayList<TabularColumn>();
+               for (int i = 0; i < columnCount; i++) {
+                       header.add(new TabularColumn("col" + i, PropertyType.STRING));
+               }
+               Node tableNode = session().getRootNode().addNode("table",
+                               ArgeoTypes.ARGEO_TABLE);
+               TabularWriter writer = new JcrTabularWriter(tableNode, header,
+                               ArgeoTypes.ARGEO_CSV);
+               for (int i = 0; i < rowCount; i++) {
+                       List<Object> objs = new ArrayList<Object>();
+                       for (int j = 0; j < columnCount; j++) {
+                               objs.add(stringValue);
+                       }
+                       writer.appendRow(objs);
+               }
+               writer.close();
+               session().save();
+
+               if (log.isDebugEnabled())
+                       log.debug("Wrote tabular content " + rowCount + " rows, "
+                                       + columnCount + " columns");
+               // read
+               TabularRowIterator rowIt = new JcrTabularRowIterator(tableNode);
+               Long count = 0l;
+               while (rowIt.hasNext()) {
+                       TabularRow tr = rowIt.next();
+                       assertEquals(header.size(), tr.size());
+                       count++;
+               }
+               assertEquals(rowCount, count);
+               if (log.isDebugEnabled())
+                       log.debug("Read tabular content " + rowCount + " rows, "
+                                       + columnCount + " columns");
+       }
+
+       protected File getRepositoryFile() throws Exception {
+               Resource res = new ClassPathResource(
+                               "org/argeo/server/jcr/repository-h2.xml");
+               return res.getFile();
+       }
+
+       protected Repository createRepository() throws Exception {
+               // JackrabbitContainer repo = new JackrabbitContainer();
+               // repo.setHomeDirectory(getHomeDir());
+               // repo.setConfiguration(new FileSystemResource(
+               // getRepositoryFile()));
+               // repo.setInMemory(true);
+               // repo.set
+               Repository repository = new TransientRepository(getRepositoryFile(),
+                               getHomeDir());
+               return repository;
+       }
+
+}