From 5b8a94ea4b5ecba326ebd3f755b3e2c2f1d94310 Mon Sep 17 00:00:00 2001 From: Mathieu Baudier Date: Tue, 20 Sep 2011 19:16:16 +0000 Subject: [PATCH] First working tabular content in JCR git-svn-id: https://svn.argeo.org/commons/trunk@4744 4cfe0d0a-d680-48aa-b62c-e0a02a3f76cc --- .../argeo/util/tabular/ArrayTabularRow.java | 21 +++ .../argeo/util/tabular/CsvTabularWriter.java | 23 +++ .../org/argeo/util/tabular/TabularColumn.java | 35 ++++ .../argeo/util/tabular/TabularContent.java | 4 +- .../org/argeo/util/tabular/TabularRow.java | 2 + .../org/argeo/util/tabular/TabularWriter.java | 12 ++ .../org.argeo.server.jcr/build.properties | 3 +- .../main/java/org/argeo/jcr/ArgeoNames.java | 4 + .../main/java/org/argeo/jcr/ArgeoTypes.java | 5 + .../jcr/tabular/JcrTabularRowIterator.java | 171 ++++++++++++++++++ .../argeo/jcr/tabular/JcrTabularWriter.java | 71 ++++++++ .../main/resources/org/argeo/jcr/argeo.cnd | 9 + .../org/argeo/jcr/tabular/JcrTabularTest.java | 110 +++++++++++ 13 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java create mode 100644 basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java create mode 100644 server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java create mode 100644 server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java create mode 100644 server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java new file mode 100644 index 000000000..311b0faea --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/ArrayTabularRow.java @@ -0,0 +1,21 @@ +package org.argeo.util.tabular; + +import java.util.List; + +/** Minimal tabular row wrapping an {@link Object} array */ +public class ArrayTabularRow implements TabularRow { + private final Object[] arr; + + public ArrayTabularRow(List objs) { + this.arr = objs.toArray(); + } + + public Object get(Integer col) { + return arr[col]; + } + + public int size() { + return arr.length; + } + +} diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java new file mode 100644 index 000000000..20a275a44 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/CsvTabularWriter.java @@ -0,0 +1,23 @@ +package org.argeo.util.tabular; + +import java.io.OutputStream; +import java.util.List; + +import org.argeo.util.CsvWriter; + +/** Write tabular content in a stream as CSV. Wraps a {@link CsvWriter}. */ +public class CsvTabularWriter implements TabularWriter { + private CsvWriter csvWriter; + + public CsvTabularWriter(OutputStream out) { + this.csvWriter = new CsvWriter(out); + } + + public void appendRow(List row) { + csvWriter.writeLine(row); + } + + public void close() { + } + +} diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java new file mode 100644 index 000000000..4a7abf729 --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularColumn.java @@ -0,0 +1,35 @@ +package org.argeo.util.tabular; + +/** The column in a tabular content */ +public class TabularColumn { + private String name; + /** + * JCR types, see + * http://www.day.com/maven/javax.jcr/javadocs/jcr-2.0/index.html + * ?javax/jcr/Property.html + */ + private Integer type; + + public TabularColumn(String name, Integer type) { + super(); + this.name = name; + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public Integer getType() { + return type; + } + + public void setType(Integer type) { + this.type = type; + } + +} diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularContent.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularContent.java index d5d05192c..eadfda34c 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularContent.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularContent.java @@ -8,5 +8,7 @@ import java.util.List; */ public interface TabularContent { /** The headers of this table or null is none available. */ - public List getHeaders(); + public List getColumns(); + + public TabularRowIterator read(); } diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRow.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRow.java index cf8c0f1a3..0d8ba4fde 100644 --- a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRow.java +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularRow.java @@ -2,5 +2,7 @@ package org.argeo.util.tabular; /** A row of tabular data */ public interface TabularRow { + public Object get(Integer col); + public int size(); } diff --git a/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java new file mode 100644 index 000000000..ab493592b --- /dev/null +++ b/basic/runtime/org.argeo.basic.nodeps/src/main/java/org/argeo/util/tabular/TabularWriter.java @@ -0,0 +1,12 @@ +package org.argeo.util.tabular; + +import java.util.List; + +/** Write to a tabular content */ +public interface TabularWriter { + /** Append a new row of data */ + public void appendRow(List row); + + /** Finish persisting data and release resources */ + public void close(); +} diff --git a/server/runtime/org.argeo.server.jcr/build.properties b/server/runtime/org.argeo.server.jcr/build.properties index b0df02563..0beaef5b1 100644 --- a/server/runtime/org.argeo.server.jcr/build.properties +++ b/server/runtime/org.argeo.server.jcr/build.properties @@ -17,5 +17,6 @@ additional.bundles = com.springsource.slf4j.api,\ com.springsource.org.apache.commons.dbcp,\ com.springsource.org.apache.commons.pool,\ org.argeo.dep.osgi.jackrabbit,\ - com.springsource.org.h2 + com.springsource.org.h2,\ + org.argeo.dep.osgi.tika diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoNames.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoNames.java index e24ca4386..86a909483 100644 --- a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoNames.java +++ b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoNames.java @@ -2,6 +2,7 @@ package org.argeo.jcr; /** JCR names in the http://www.argeo.org/argeo namespace */ public interface ArgeoNames { + public final static String ARGEO_NAMESPACE = "http://www.argeo.org/ns/argeo"; public final static String ARGEO_ = "argeo:"; public final static String ARGEO_URI = "argeo:uri"; @@ -15,4 +16,7 @@ public interface ArgeoNames { public final static String ARGEO_PRIMARY_EMAIL = "argeo:primaryEmail"; public final static String ARGEO_PRIMARY_ORGANIZATION = "argeo:primaryOrganization"; + // tabular + public final static String ARGEO_IS_KEY = "argeo:isKey"; + } diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoTypes.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoTypes.java index 6410b79ad..8dfab71da 100644 --- a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoTypes.java +++ b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/ArgeoTypes.java @@ -5,4 +5,9 @@ public interface ArgeoTypes { public final static String ARGEO_LINK = "argeo:link"; public final static String ARGEO_USER_HOME = "argeo:userHome"; public final static String ARGEO_USER_PROFILE = "argeo:userProfile"; + + // tabular + public final static String ARGEO_TABLE = "argeo:table"; + public final static String ARGEO_COLUMN = "argeo:column"; + public final static String ARGEO_CSV = "argeo:csv"; } diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java new file mode 100644 index 000000000..238e7d26c --- /dev/null +++ b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularRowIterator.java @@ -0,0 +1,171 @@ +package org.argeo.jcr.tabular; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ArrayBlockingQueue; + +import javax.jcr.Binary; +import javax.jcr.Node; +import javax.jcr.NodeIterator; +import javax.jcr.Property; +import javax.jcr.PropertyType; +import javax.jcr.RepositoryException; + +import org.apache.commons.io.IOUtils; +import org.argeo.ArgeoException; +import org.argeo.jcr.ArgeoTypes; +import org.argeo.jcr.JcrUtils; +import org.argeo.util.CsvParser; +import org.argeo.util.tabular.ArrayTabularRow; +import org.argeo.util.tabular.TabularColumn; +import org.argeo.util.tabular.TabularRow; +import org.argeo.util.tabular.TabularRowIterator; + +/** Iterates over the rows of a {@link ArgeoTypes#ARGEO_TABLE} node. */ +public class JcrTabularRowIterator implements TabularRowIterator { + private Boolean hasNext = null; + private Boolean parsingCompleted = false; + + private Long currentRowNumber = 0l; + + private List header = new ArrayList(); + + /** referenced so that we can close it */ + private Binary binary; + private InputStream in; + + private CsvParser csvParser; + private ArrayBlockingQueue> textLines; + + public JcrTabularRowIterator(Node tableNode) { + try { + Node contentNode = tableNode.getNode(Property.JCR_CONTENT); + for (NodeIterator it = tableNode.getNodes(); it.hasNext();) { + Node node = it.nextNode(); + if (node.isNodeType(ArgeoTypes.ARGEO_COLUMN)) { + Integer type = PropertyType.valueFromName(node.getProperty( + Property.JCR_REQUIRED_TYPE).getString()); + TabularColumn tc = new TabularColumn(node.getName(), type); + header.add(tc); + // } else if (node.getName().equals(Property.JCR_CONTENT)) { + // contentNode = node; + } + } + // should not happen since content is mandatory + assert contentNode != null; + + if (contentNode.isNodeType(ArgeoTypes.ARGEO_CSV)) { + textLines = new ArrayBlockingQueue>(1000); + csvParser = new CsvParser() { + protected void processLine(Integer lineNumber, + List header, List tokens) { + try { + textLines.put(tokens); + } catch (InterruptedException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + //textLines.add(tokens); + if (hasNext == null) { + hasNext = true; + synchronized (JcrTabularRowIterator.this) { + JcrTabularRowIterator.this.notifyAll(); + } + } + } + }; + csvParser.setNoHeader(true); + binary = contentNode.getProperty(Property.JCR_DATA).getBinary(); + in = binary.getStream(); + Thread thread = new Thread(contentNode.getPath() + " reader") { + public void run() { + try { + csvParser.parse(in); + } finally { + parsingCompleted = true; + IOUtils.closeQuietly(in); + } + } + }; + thread.start(); + } + } catch (RepositoryException e) { + throw new ArgeoException("Cannot read table " + tableNode, e); + } + } + + public synchronized boolean hasNext() { + // we don't know if there is anything available + // while (hasNext == null) + // try { + // wait(); + // } catch (InterruptedException e) { + // // silent + // // FIXME better deal with interruption + // Thread.currentThread().interrupt(); + // break; + // } + + // buffer not empty + if (!textLines.isEmpty()) + return true; + + // maybe the parsing is finished but the flag has not been set + while (!parsingCompleted && textLines.isEmpty()) + try { + wait(100); + } catch (InterruptedException e) { + // silent + // FIXME better deal with interruption + Thread.currentThread().interrupt(); + break; + } + + // buffer not empty + if (!textLines.isEmpty()) + return true; + + // (parsingCompleted && textLines.isEmpty()) + return false; + +// if (!hasNext && textLines.isEmpty()) { +// if (in != null) { +// IOUtils.closeQuietly(in); +// in = null; +// } +// if (binary != null) { +// JcrUtils.closeQuietly(binary); +// binary = null; +// } +// return false; +// } else +// return true; + } + + public synchronized TabularRow next() { + try { + List tokens = textLines.take(); + List objs = new ArrayList(tokens.size()); + for (String token : tokens) { + // TODO convert to other formats using header + objs.add(token); + } + currentRowNumber++; + return new ArrayTabularRow(objs); + } catch (InterruptedException e) { + // silent + // FIXME better deal with interruption + } + return null; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + public Long getCurrentRowNumber() { + return currentRowNumber; + } + +} diff --git a/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java new file mode 100644 index 000000000..cd6feb565 --- /dev/null +++ b/server/runtime/org.argeo.server.jcr/src/main/java/org/argeo/jcr/tabular/JcrTabularWriter.java @@ -0,0 +1,71 @@ +package org.argeo.jcr.tabular; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.util.List; + +import javax.jcr.Binary; +import javax.jcr.Node; +import javax.jcr.Property; +import javax.jcr.PropertyType; +import javax.jcr.RepositoryException; + +import org.apache.commons.io.IOUtils; +import org.argeo.ArgeoException; +import org.argeo.jcr.ArgeoTypes; +import org.argeo.jcr.JcrUtils; +import org.argeo.util.CsvWriter; +import org.argeo.util.tabular.TabularColumn; +import org.argeo.util.tabular.TabularWriter; + +/** Write / reference tabular content in a JCR repository. */ +public class JcrTabularWriter implements TabularWriter { + private Node contentNode; + private ByteArrayOutputStream out; + private CsvWriter csvWriter; + + /** Creates a table node */ + public JcrTabularWriter(Node tableNode, List columns, + String contentNodeType) { + try { + for (TabularColumn column : columns) { + Node columnNode = tableNode.addNode(column.getName(), + ArgeoTypes.ARGEO_COLUMN); + columnNode.setProperty(Property.JCR_REQUIRED_TYPE, + PropertyType.nameFromValue(column.getType())); + } + contentNode = tableNode.addNode(Property.JCR_CONTENT, + contentNodeType); + if (contentNodeType.equals(ArgeoTypes.ARGEO_CSV)) { + contentNode.setProperty(Property.JCR_MIMETYPE, "text/csv"); + contentNode.setProperty(Property.JCR_ENCODING, "UTF-8"); + out = new ByteArrayOutputStream(); + csvWriter = new CsvWriter(out); + } + } catch (RepositoryException e) { + throw new ArgeoException("Cannot create table node " + tableNode, e); + } + } + + public void appendRow(List row) { + csvWriter.writeLine(row); + } + + public void close() { + Binary binary = null; + InputStream in = null; + try { + // TODO parallelize with pipes and writing from another thread + in = new ByteArrayInputStream(out.toByteArray()); + binary = contentNode.getSession().getValueFactory() + .createBinary(in); + contentNode.setProperty(Property.JCR_DATA, binary); + } catch (RepositoryException e) { + throw new ArgeoException("Cannot store data in " + contentNode, e); + } finally { + IOUtils.closeQuietly(in); + JcrUtils.closeQuietly(binary); + } + } +} diff --git a/server/runtime/org.argeo.server.jcr/src/main/resources/org/argeo/jcr/argeo.cnd b/server/runtime/org.argeo.server.jcr/src/main/resources/org/argeo/jcr/argeo.cnd index 19fba3775..8fa59aceb 100644 --- a/server/runtime/org.argeo.server.jcr/src/main/resources/org/argeo/jcr/argeo.cnd +++ b/server/runtime/org.argeo.server.jcr/src/main/resources/org/argeo/jcr/argeo.cnd @@ -16,3 +16,12 @@ mixin [argeo:userProfile] > mix:created, mix:lastModified, mix:title, mix:versionable mixin - argeo:userID (STRING) m + +// TABULAR CONTENT +[argeo:table] > nt:file ++ * (argeo:column) * + +[argeo:column] > mix:title +- jcr:requiredType (STRING) = 'STRING' + +[argeo:csv] > nt:resource diff --git a/server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java b/server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java new file mode 100644 index 000000000..f91917d98 --- /dev/null +++ b/server/runtime/org.argeo.server.jcr/src/test/java/org/argeo/jcr/tabular/JcrTabularTest.java @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2010 Mathieu Baudier + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.argeo.jcr.tabular; + +import java.io.File; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; + +import javax.jcr.Node; +import javax.jcr.PropertyType; +import javax.jcr.Repository; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.jackrabbit.commons.cnd.CndImporter; +import org.apache.jackrabbit.core.TransientRepository; +import org.argeo.jcr.ArgeoNames; +import org.argeo.jcr.ArgeoTypes; +import org.argeo.jcr.unit.AbstractJcrTestCase; +import org.argeo.util.tabular.TabularColumn; +import org.argeo.util.tabular.TabularRow; +import org.argeo.util.tabular.TabularRowIterator; +import org.argeo.util.tabular.TabularWriter; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + +public class JcrTabularTest extends AbstractJcrTestCase { + private final static Log log = LogFactory.getLog(JcrTabularTest.class); + + public void testWriteReadCsv() throws Exception { + session().setNamespacePrefix("argeo", ArgeoNames.ARGEO_NAMESPACE); + InputStreamReader reader = new InputStreamReader(getClass() + .getResourceAsStream("/org/argeo/jcr/argeo.cnd")); + CndImporter.registerNodeTypes(reader, session()); + reader.close(); + + // write + Integer columnCount = 15; + Long rowCount = 10000l; + String stringValue = "test, \ntest"; + + List header = new ArrayList(); + for (int i = 0; i < columnCount; i++) { + header.add(new TabularColumn("col" + i, PropertyType.STRING)); + } + Node tableNode = session().getRootNode().addNode("table", + ArgeoTypes.ARGEO_TABLE); + TabularWriter writer = new JcrTabularWriter(tableNode, header, + ArgeoTypes.ARGEO_CSV); + for (int i = 0; i < rowCount; i++) { + List objs = new ArrayList(); + for (int j = 0; j < columnCount; j++) { + objs.add(stringValue); + } + writer.appendRow(objs); + } + writer.close(); + session().save(); + + if (log.isDebugEnabled()) + log.debug("Wrote tabular content " + rowCount + " rows, " + + columnCount + " columns"); + // read + TabularRowIterator rowIt = new JcrTabularRowIterator(tableNode); + Long count = 0l; + while (rowIt.hasNext()) { + TabularRow tr = rowIt.next(); + assertEquals(header.size(), tr.size()); + count++; + } + assertEquals(rowCount, count); + if (log.isDebugEnabled()) + log.debug("Read tabular content " + rowCount + " rows, " + + columnCount + " columns"); + } + + protected File getRepositoryFile() throws Exception { + Resource res = new ClassPathResource( + "org/argeo/server/jcr/repository-h2.xml"); + return res.getFile(); + } + + protected Repository createRepository() throws Exception { + // JackrabbitContainer repo = new JackrabbitContainer(); + // repo.setHomeDirectory(getHomeDir()); + // repo.setConfiguration(new FileSystemResource( + // getRepositoryFile())); + // repo.setInMemory(true); + // repo.set + Repository repository = new TransientRepository(getRepositoryFile(), + getHomeDir()); + return repository; + } + +} -- 2.39.2