--- /dev/null
+package org.argeo.util.tabular;
+
+import java.util.List;
+
+/** Minimal tabular row wrapping an {@link Object} array */
+public class ArrayTabularRow implements TabularRow {
+ private final Object[] arr;
+
+ public ArrayTabularRow(List<?> objs) {
+ this.arr = objs.toArray();
+ }
+
+ public Object get(Integer col) {
+ return arr[col];
+ }
+
+ public int size() {
+ return arr.length;
+ }
+
+}
--- /dev/null
+package org.argeo.util.tabular;
+
+import java.io.OutputStream;
+import java.util.List;
+
+import org.argeo.util.CsvWriter;
+
+/** Write tabular content in a stream as CSV. Wraps a {@link CsvWriter}. */
+public class CsvTabularWriter implements TabularWriter {
+ private CsvWriter csvWriter;
+
+ public CsvTabularWriter(OutputStream out) {
+ this.csvWriter = new CsvWriter(out);
+ }
+
+ public void appendRow(List<?> row) {
+ csvWriter.writeLine(row);
+ }
+
+ public void close() {
+ }
+
+}
--- /dev/null
+package org.argeo.util.tabular;
+
+/** The column in a tabular content */
+public class TabularColumn {
+ private String name;
+ /**
+ * JCR types, see
+ * http://www.day.com/maven/javax.jcr/javadocs/jcr-2.0/index.html
+ * ?javax/jcr/Property.html
+ */
+ private Integer type;
+
+ public TabularColumn(String name, Integer type) {
+ super();
+ this.name = name;
+ this.type = type;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public Integer getType() {
+ return type;
+ }
+
+ public void setType(Integer type) {
+ this.type = type;
+ }
+
+}
*/
public interface TabularContent {
/** The headers of this table or <code>null</code> is none available. */
- public List<String> getHeaders();
+ public List<TabularColumn> getColumns();
+
+ public TabularRowIterator read();
}
/** A row of tabular data */
public interface TabularRow {
+ public Object get(Integer col);
+ public int size();
}
--- /dev/null
+package org.argeo.util.tabular;
+
+import java.util.List;
+
+/** Write to a tabular content */
+public interface TabularWriter {
+ /** Append a new row of data */
+ public void appendRow(List<?> row);
+
+ /** Finish persisting data and release resources */
+ public void close();
+}
com.springsource.org.apache.commons.dbcp,\
com.springsource.org.apache.commons.pool,\
org.argeo.dep.osgi.jackrabbit,\
- com.springsource.org.h2
+ com.springsource.org.h2,\
+ org.argeo.dep.osgi.tika
/** JCR names in the http://www.argeo.org/argeo namespace */
public interface ArgeoNames {
+ public final static String ARGEO_NAMESPACE = "http://www.argeo.org/ns/argeo";
public final static String ARGEO_ = "argeo:";
public final static String ARGEO_URI = "argeo:uri";
public final static String ARGEO_PRIMARY_EMAIL = "argeo:primaryEmail";
public final static String ARGEO_PRIMARY_ORGANIZATION = "argeo:primaryOrganization";
+ // tabular
+ public final static String ARGEO_IS_KEY = "argeo:isKey";
+
}
public final static String ARGEO_LINK = "argeo:link";
public final static String ARGEO_USER_HOME = "argeo:userHome";
public final static String ARGEO_USER_PROFILE = "argeo:userProfile";
+
+ // tabular
+ public final static String ARGEO_TABLE = "argeo:table";
+ public final static String ARGEO_COLUMN = "argeo:column";
+ public final static String ARGEO_CSV = "argeo:csv";
}
--- /dev/null
+package org.argeo.jcr.tabular;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import javax.jcr.Binary;
+import javax.jcr.Node;
+import javax.jcr.NodeIterator;
+import javax.jcr.Property;
+import javax.jcr.PropertyType;
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.IOUtils;
+import org.argeo.ArgeoException;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.JcrUtils;
+import org.argeo.util.CsvParser;
+import org.argeo.util.tabular.ArrayTabularRow;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularRow;
+import org.argeo.util.tabular.TabularRowIterator;
+
+/** Iterates over the rows of a {@link ArgeoTypes#ARGEO_TABLE} node. */
+public class JcrTabularRowIterator implements TabularRowIterator {
+ private Boolean hasNext = null;
+ private Boolean parsingCompleted = false;
+
+ private Long currentRowNumber = 0l;
+
+ private List<TabularColumn> header = new ArrayList<TabularColumn>();
+
+ /** referenced so that we can close it */
+ private Binary binary;
+ private InputStream in;
+
+ private CsvParser csvParser;
+ private ArrayBlockingQueue<List<String>> textLines;
+
+ public JcrTabularRowIterator(Node tableNode) {
+ try {
+ Node contentNode = tableNode.getNode(Property.JCR_CONTENT);
+ for (NodeIterator it = tableNode.getNodes(); it.hasNext();) {
+ Node node = it.nextNode();
+ if (node.isNodeType(ArgeoTypes.ARGEO_COLUMN)) {
+ Integer type = PropertyType.valueFromName(node.getProperty(
+ Property.JCR_REQUIRED_TYPE).getString());
+ TabularColumn tc = new TabularColumn(node.getName(), type);
+ header.add(tc);
+ // } else if (node.getName().equals(Property.JCR_CONTENT)) {
+ // contentNode = node;
+ }
+ }
+ // should not happen since content is mandatory
+ assert contentNode != null;
+
+ if (contentNode.isNodeType(ArgeoTypes.ARGEO_CSV)) {
+ textLines = new ArrayBlockingQueue<List<String>>(1000);
+ csvParser = new CsvParser() {
+ protected void processLine(Integer lineNumber,
+ List<String> header, List<String> tokens) {
+ try {
+ textLines.put(tokens);
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ //textLines.add(tokens);
+ if (hasNext == null) {
+ hasNext = true;
+ synchronized (JcrTabularRowIterator.this) {
+ JcrTabularRowIterator.this.notifyAll();
+ }
+ }
+ }
+ };
+ csvParser.setNoHeader(true);
+ binary = contentNode.getProperty(Property.JCR_DATA).getBinary();
+ in = binary.getStream();
+ Thread thread = new Thread(contentNode.getPath() + " reader") {
+ public void run() {
+ try {
+ csvParser.parse(in);
+ } finally {
+ parsingCompleted = true;
+ IOUtils.closeQuietly(in);
+ }
+ }
+ };
+ thread.start();
+ }
+ } catch (RepositoryException e) {
+ throw new ArgeoException("Cannot read table " + tableNode, e);
+ }
+ }
+
+ public synchronized boolean hasNext() {
+ // we don't know if there is anything available
+ // while (hasNext == null)
+ // try {
+ // wait();
+ // } catch (InterruptedException e) {
+ // // silent
+ // // FIXME better deal with interruption
+ // Thread.currentThread().interrupt();
+ // break;
+ // }
+
+ // buffer not empty
+ if (!textLines.isEmpty())
+ return true;
+
+ // maybe the parsing is finished but the flag has not been set
+ while (!parsingCompleted && textLines.isEmpty())
+ try {
+ wait(100);
+ } catch (InterruptedException e) {
+ // silent
+ // FIXME better deal with interruption
+ Thread.currentThread().interrupt();
+ break;
+ }
+
+ // buffer not empty
+ if (!textLines.isEmpty())
+ return true;
+
+ // (parsingCompleted && textLines.isEmpty())
+ return false;
+
+// if (!hasNext && textLines.isEmpty()) {
+// if (in != null) {
+// IOUtils.closeQuietly(in);
+// in = null;
+// }
+// if (binary != null) {
+// JcrUtils.closeQuietly(binary);
+// binary = null;
+// }
+// return false;
+// } else
+// return true;
+ }
+
+ public synchronized TabularRow next() {
+ try {
+ List<String> tokens = textLines.take();
+ List<Object> objs = new ArrayList<Object>(tokens.size());
+ for (String token : tokens) {
+ // TODO convert to other formats using header
+ objs.add(token);
+ }
+ currentRowNumber++;
+ return new ArrayTabularRow(objs);
+ } catch (InterruptedException e) {
+ // silent
+ // FIXME better deal with interruption
+ }
+ return null;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Long getCurrentRowNumber() {
+ return currentRowNumber;
+ }
+
+}
--- /dev/null
+package org.argeo.jcr.tabular;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import javax.jcr.Binary;
+import javax.jcr.Node;
+import javax.jcr.Property;
+import javax.jcr.PropertyType;
+import javax.jcr.RepositoryException;
+
+import org.apache.commons.io.IOUtils;
+import org.argeo.ArgeoException;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.JcrUtils;
+import org.argeo.util.CsvWriter;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularWriter;
+
+/** Write / reference tabular content in a JCR repository. */
+public class JcrTabularWriter implements TabularWriter {
+ private Node contentNode;
+ private ByteArrayOutputStream out;
+ private CsvWriter csvWriter;
+
+ /** Creates a table node */
+ public JcrTabularWriter(Node tableNode, List<TabularColumn> columns,
+ String contentNodeType) {
+ try {
+ for (TabularColumn column : columns) {
+ Node columnNode = tableNode.addNode(column.getName(),
+ ArgeoTypes.ARGEO_COLUMN);
+ columnNode.setProperty(Property.JCR_REQUIRED_TYPE,
+ PropertyType.nameFromValue(column.getType()));
+ }
+ contentNode = tableNode.addNode(Property.JCR_CONTENT,
+ contentNodeType);
+ if (contentNodeType.equals(ArgeoTypes.ARGEO_CSV)) {
+ contentNode.setProperty(Property.JCR_MIMETYPE, "text/csv");
+ contentNode.setProperty(Property.JCR_ENCODING, "UTF-8");
+ out = new ByteArrayOutputStream();
+ csvWriter = new CsvWriter(out);
+ }
+ } catch (RepositoryException e) {
+ throw new ArgeoException("Cannot create table node " + tableNode, e);
+ }
+ }
+
+ public void appendRow(List<?> row) {
+ csvWriter.writeLine(row);
+ }
+
+ public void close() {
+ Binary binary = null;
+ InputStream in = null;
+ try {
+ // TODO parallelize with pipes and writing from another thread
+ in = new ByteArrayInputStream(out.toByteArray());
+ binary = contentNode.getSession().getValueFactory()
+ .createBinary(in);
+ contentNode.setProperty(Property.JCR_DATA, binary);
+ } catch (RepositoryException e) {
+ throw new ArgeoException("Cannot store data in " + contentNode, e);
+ } finally {
+ IOUtils.closeQuietly(in);
+ JcrUtils.closeQuietly(binary);
+ }
+ }
+}
[argeo:userProfile] > mix:created, mix:lastModified, mix:title, mix:versionable
mixin
- argeo:userID (STRING) m
+
+// TABULAR CONTENT
+[argeo:table] > nt:file
++ * (argeo:column) *
+
+[argeo:column] > mix:title
+- jcr:requiredType (STRING) = 'STRING'
+
+[argeo:csv] > nt:resource
--- /dev/null
+/*
+ * Copyright (C) 2010 Mathieu Baudier <mbaudier@argeo.org>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.argeo.jcr.tabular;
+
+import java.io.File;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.jcr.Node;
+import javax.jcr.PropertyType;
+import javax.jcr.Repository;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.jackrabbit.commons.cnd.CndImporter;
+import org.apache.jackrabbit.core.TransientRepository;
+import org.argeo.jcr.ArgeoNames;
+import org.argeo.jcr.ArgeoTypes;
+import org.argeo.jcr.unit.AbstractJcrTestCase;
+import org.argeo.util.tabular.TabularColumn;
+import org.argeo.util.tabular.TabularRow;
+import org.argeo.util.tabular.TabularRowIterator;
+import org.argeo.util.tabular.TabularWriter;
+import org.springframework.core.io.ClassPathResource;
+import org.springframework.core.io.Resource;
+
+public class JcrTabularTest extends AbstractJcrTestCase {
+ private final static Log log = LogFactory.getLog(JcrTabularTest.class);
+
+ public void testWriteReadCsv() throws Exception {
+ session().setNamespacePrefix("argeo", ArgeoNames.ARGEO_NAMESPACE);
+ InputStreamReader reader = new InputStreamReader(getClass()
+ .getResourceAsStream("/org/argeo/jcr/argeo.cnd"));
+ CndImporter.registerNodeTypes(reader, session());
+ reader.close();
+
+ // write
+ Integer columnCount = 15;
+ Long rowCount = 10000l;
+ String stringValue = "test, \ntest";
+
+ List<TabularColumn> header = new ArrayList<TabularColumn>();
+ for (int i = 0; i < columnCount; i++) {
+ header.add(new TabularColumn("col" + i, PropertyType.STRING));
+ }
+ Node tableNode = session().getRootNode().addNode("table",
+ ArgeoTypes.ARGEO_TABLE);
+ TabularWriter writer = new JcrTabularWriter(tableNode, header,
+ ArgeoTypes.ARGEO_CSV);
+ for (int i = 0; i < rowCount; i++) {
+ List<Object> objs = new ArrayList<Object>();
+ for (int j = 0; j < columnCount; j++) {
+ objs.add(stringValue);
+ }
+ writer.appendRow(objs);
+ }
+ writer.close();
+ session().save();
+
+ if (log.isDebugEnabled())
+ log.debug("Wrote tabular content " + rowCount + " rows, "
+ + columnCount + " columns");
+ // read
+ TabularRowIterator rowIt = new JcrTabularRowIterator(tableNode);
+ Long count = 0l;
+ while (rowIt.hasNext()) {
+ TabularRow tr = rowIt.next();
+ assertEquals(header.size(), tr.size());
+ count++;
+ }
+ assertEquals(rowCount, count);
+ if (log.isDebugEnabled())
+ log.debug("Read tabular content " + rowCount + " rows, "
+ + columnCount + " columns");
+ }
+
+ protected File getRepositoryFile() throws Exception {
+ Resource res = new ClassPathResource(
+ "org/argeo/server/jcr/repository-h2.xml");
+ return res.getFile();
+ }
+
+ protected Repository createRepository() throws Exception {
+ // JackrabbitContainer repo = new JackrabbitContainer();
+ // repo.setHomeDirectory(getHomeDir());
+ // repo.setConfiguration(new FileSystemResource(
+ // getRepositoryFile()));
+ // repo.setInMemory(true);
+ // repo.set
+ Repository repository = new TransientRepository(getRepositoryFile(),
+ getHomeDir());
+ return repository;
+ }
+
+}