X-Git-Url: https://git.argeo.org/?a=blobdiff_plain;f=publishing%2Forg.argeo.publishing.ui%2Fsrc%2Forg%2Fargeo%2Fdocbook%2Fui%2FDbkTextInterpreter.java;h=c6fe0d6212c7c8b983e66a1867374be60c05df2c;hb=717a06075d39f660f3c130ac14b0f414a9bc4c69;hp=f5ab9975dcf2b59433f61eed1b6dd9350762756a;hpb=4bd6cf0556f597ee73c8f13df45019ccf9e418a6;p=gpl%2Fargeo-suite.git
diff --git a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java
index f5ab997..c6fe0d6 100644
--- a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java
+++ b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java
@@ -1,26 +1,34 @@
package org.argeo.docbook.ui;
+import static org.argeo.docbook.DbkType.para;
+import static org.argeo.docbook.DbkType.title;
import static org.argeo.docbook.DbkUtils.isDbk;
-import static org.argeo.docbook.DocBookType.para;
-import static org.argeo.docbook.DocBookType.title;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
import java.util.List;
+import javax.jcr.ImportUUIDBehavior;
import javax.jcr.Item;
import javax.jcr.Node;
+import javax.jcr.NodeIterator;
import javax.jcr.Property;
+import javax.jcr.PropertyIterator;
import javax.jcr.RepositoryException;
+import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.commons.io.IOUtils;
-import org.argeo.cms.text.TextInterpreter;
+import org.argeo.docbook.DbkAttr;
+import org.argeo.docbook.DbkType;
import org.argeo.jcr.Jcr;
import org.argeo.jcr.JcrException;
-import org.argeo.jcr.JcrxType;
/** Based on HTML with a few Wiki-like shortcuts. */
public class DbkTextInterpreter implements TextInterpreter {
+ private DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
@Override
public void write(Item item, String content) {
@@ -30,12 +38,55 @@ public class DbkTextInterpreter implements TextInterpreter {
if (isDbk(node, para) || isDbk(node, title)) {
String raw = convertToStorage(node, content);
validateBeforeStoring(raw);
- Node jcrText;
- if (!node.hasNode(Jcr.JCR_XMLTEXT))
- jcrText = node.addNode(Jcr.JCR_XMLTEXT, JcrxType.JCRX_XMLTEXT);
- else
- jcrText = node.getNode(Jcr.JCR_XMLTEXT);
- jcrText.setProperty(Jcr.JCR_XMLCHARACTERS, raw);
+
+ String jcrUuid = node.getIdentifier();
+// if (node.hasProperty(Property.JCR_UUID))
+// jcrUuid = node.getProperty(Property.JCR_UUID).getString();
+// else {
+// // TODO use time based
+// jcrUuid = UUID.randomUUID().toString();
+// node.setProperty(Property.JCR_UUID, jcrUuid);
+// node.getSession().save();
+// }
+
+ StringBuilder namespaces = new StringBuilder();
+ namespaces.append(" xmlns:dbk=\"http://docbook.org/ns/docbook\"");
+ namespaces.append(" xmlns:jcr=\"http://www.jcp.org/jcr/1.0\"");
+ namespaces.append(" xmlns:xlink=\"http://www.w3.org/1999/xlink\"");
+ raw = "<" + node.getName() + " jcr:uuid=\"" + jcrUuid + "\"" + namespaces + ">" + raw + ""
+ + node.getName() + ">";
+// System.out.println(raw);
+ try (InputStream in = new ByteArrayInputStream(raw.getBytes(StandardCharsets.UTF_8))) {
+ node.getSession().importXML(node.getParent().getPath(), in,
+ ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING);
+ // node.getSession().save();
+ } catch (IOException e) {
+ throw new IllegalArgumentException("Cannot parse raw content of " + node, e);
+ }
+
+// try {
+// DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
+// Document document;
+// try (Reader in = new StringReader(raw)) {
+// document = documentBuilder.parse(new InputSource(in));
+// }
+// NodeList nl = document.getChildNodes();
+// for (int i = 0; i < nl.getLength(); i++) {
+// org.w3c.dom.Node n = nl.item(i);
+// if (node instanceof Text) {
+//
+// }
+// }
+// } catch (ParserConfigurationException | SAXException | IOException e) {
+// throw new IllegalArgumentException("Cannot parse raw content of " + node, e);
+// }
+
+// Node jcrText;
+// if (!node.hasNode(Jcr.JCR_XMLTEXT))
+// jcrText = node.addNode(Jcr.JCR_XMLTEXT, JcrxType.JCRX_XMLTEXT);
+// else
+// jcrText = node.getNode(Jcr.JCR_XMLTEXT);
+// jcrText.setProperty(Jcr.JCR_XMLCHARACTERS, raw);
} else {
throw new IllegalArgumentException("Don't know how to interpret " + node);
}
@@ -66,12 +117,28 @@ public class DbkTextInterpreter implements TextInterpreter {
if (item instanceof Node) {
Node node = (Node) item;
if (isDbk(node, para) || isDbk(node, title)) {
- Node jcrText = node.getNode(Jcr.JCR_XMLTEXT);
- String txt = jcrText.getProperty(Jcr.JCR_XMLCHARACTERS).getString();
- // TODO make it more robust
- // txt = txt.replace("\n", "").replace("\t", "");
- txt = txt.replace("\t", " ");
- return txt;
+ StringBuilder sb = new StringBuilder();
+ readXml(node, sb);
+// NodeIterator nit = node.getNodes();
+// while (nit.hasNext()) {
+// Node child = nit.nextNode();
+// if (child.getName().equals(Jcr.JCR_XMLTEXT)) {
+// Node jcrText = node.getNode(Jcr.JCR_XMLTEXT);
+// String txt = jcrText.getProperty(Jcr.JCR_XMLCHARACTERS).getString();
+// // TODO make it more robust
+// // txt = txt.replace("\n", "").replace("\t", "");
+// txt = txt.replace("\t", " ");
+// sb.append(txt);
+// } else {
+// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+// child.getSession().exportDocumentView(child.getPath(), out, true, false);
+// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8));
+// } catch (IOException e) {
+// throw new IllegalStateException("Cannot export " + child, e);
+// }
+// }
+// }
+ return sb.toString();
} else {
throw new IllegalArgumentException("Don't know how to interpret " + node);
}
@@ -84,10 +151,81 @@ public class DbkTextInterpreter implements TextInterpreter {
}
}
- final static int BR_LENGTH = "
".length();
+ private void readXml(Node node, StringBuilder sb) throws RepositoryException {
+ NodeIterator nit = node.getNodes();
+ while (nit.hasNext()) {
+ Node child = nit.nextNode();
+ if (child.getName().equals(Jcr.JCR_XMLTEXT)) {
+ String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString();
+ // TODO make it more robust
+ // txt = txt.replace("\n", "").replace("\t", "");
+ txt = txt.replace("\t", " ");
+ sb.append(txt);
+ } else {
+ sb.append('<').append(child.getName());
+ PropertyIterator pit = child.getProperties();
+ properties: while (pit.hasNext()) {
+ Property p = pit.nextProperty();
+ if (p.getName().startsWith("jcr:"))
+ continue properties;
+ sb.append(' ').append(p.getName()).append("=\"").append(p.getString()).append('\"');
+ }
+ sb.append('>');
+ readXml(child, sb);
+// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+// child.getSession().exportDocumentView(child.getPath(), out, true, false);
+// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8));
+// } catch (IOException e) {
+// throw new IllegalStateException("Cannot export " + child, e);
+// }
+ sb.append("").append(child.getName()).append('>');
+ }
+ }
+ }
- public String readSimpleHtml(Item item) {
- String raw = raw(item);
+ private void readAsSimpleHtml(Node node, StringBuilder sb) throws RepositoryException {
+ NodeIterator nit = node.getNodes();
+ while (nit.hasNext()) {
+ Node child = nit.nextNode();
+ if (child.getName().equals(Jcr.JCR_XMLTEXT)) {
+ String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString();
+ // TODO make it more robust
+ // txt = txt.replace("\n", "").replace("\t", "");
+ txt = txt.replace("\t", " ");
+ String html = textToSimpleHtml(txt);
+ sb.append(html);
+ } else if (child.getName().equals(DbkType.link.get())) {
+ if (child.hasProperty(DbkAttr.XLINK_HREF)) {
+ String href = child.getProperty(DbkAttr.XLINK_HREF).getString();
+ // TODO deal with other forbidden XML characters?
+ href = href.replace("&", "&");
+ sb.append("");
+ readAsSimpleHtml(child, sb);
+ sb.append("");
+ }
+ } else {
+ // ignore
+ }
+ }
+ }
+
+ private String textToSimpleHtml(String raw) {
+ // FIXME the saved data should be corrected instead.
+ if (raw.indexOf('&') >= 0) {
+ raw = raw.replace("&", "&");
+ }
+ if (raw.indexOf('<') >= 0) {
+ raw = raw.replace("<", "<");
+ }
+ if (raw.indexOf('>') >= 0) {
+ raw = raw.replace(">", ">");
+ }
+ if (raw.indexOf('\"') >= 0) {
+ raw = raw.replace("\"", """);
+ }
+ if (raw.indexOf('\'') >= 0) {
+ raw = raw.replace("\'", "'");
+ }
// raw = "" + raw + "";
if (raw.length() == 0)
return raw;
@@ -105,16 +243,18 @@ public class DbkTextInterpreter implements TextInterpreter {
} catch (IOException e) {
throw new RuntimeException(e);
}
-// String[] lines = raw.split("[\r\n]+");
-// if (lines.length == 1)
-// return lines[0];
-// StringBuilder sb = new StringBuilder(raw.length() + lines.length * BR_LENGTH);
-// for (int i = 0; i < lines.length; i++) {
-// if (i != 0)
-// sb.append("
");
-// sb.append(lines[i]);
-// }
-// return sb.toString();
+ }
+
+ final static int BR_LENGTH = "
".length();
+
+ public String readSimpleHtml(Item item) {
+ try {
+ StringBuilder sb = new StringBuilder();
+ readAsSimpleHtml((Node) item, sb);
+ return sb.toString();
+ } catch (RepositoryException e) {
+ throw new JcrException("Cannot convert " + item + " to simple HTML", e);
+ }
}
// EXTENSIBILITY