X-Git-Url: https://git.argeo.org/?p=gpl%2Fargeo-suite.git;a=blobdiff_plain;f=publishing%2Forg.argeo.publishing.ui%2Fsrc%2Forg%2Fargeo%2Fdocbook%2Fui%2FDbkTextInterpreter.java;fp=publishing%2Forg.argeo.publishing.ui%2Fsrc%2Forg%2Fargeo%2Fdocbook%2Fui%2FDbkTextInterpreter.java;h=17810505057b5c4b1a077c86a1bbe1783c57ae28;hp=eafddd9243dddb232afaa4b5b44a0d2f6028358a;hb=2c7baf5cc1437770abc7df32e29e3c9ca29b7132;hpb=309a56223bbb3ef2698cb336a3e1c7c38ec288c3 diff --git a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java index eafddd9..1781050 100644 --- a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java +++ b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java @@ -1,26 +1,45 @@ package org.argeo.docbook.ui; -import static org.argeo.docbook.DbkUtils.isDbk; import static org.argeo.docbook.DbkType.para; import static org.argeo.docbook.DbkType.title; +import static org.argeo.docbook.DbkUtils.isDbk; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; import java.io.StringReader; +import java.nio.charset.StandardCharsets; import java.util.List; +import java.util.UUID; +import javax.jcr.ImportUUIDBehavior; import javax.jcr.Item; import javax.jcr.Node; +import javax.jcr.NodeIterator; import javax.jcr.Property; +import javax.jcr.PropertyIterator; import javax.jcr.RepositoryException; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.io.IOUtils; import org.argeo.cms.text.TextInterpreter; +import org.argeo.docbook.DbkAttr; +import org.argeo.docbook.DbkType; import org.argeo.jcr.Jcr; import org.argeo.jcr.JcrException; import org.argeo.jcr.JcrxType; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; /** Based on HTML with a few Wiki-like shortcuts. */ public class DbkTextInterpreter implements TextInterpreter { + private DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); @Override public void write(Item item, String content) { @@ -30,12 +49,55 @@ public class DbkTextInterpreter implements TextInterpreter { if (isDbk(node, para) || isDbk(node, title)) { String raw = convertToStorage(node, content); validateBeforeStoring(raw); - Node jcrText; - if (!node.hasNode(Jcr.JCR_XMLTEXT)) - jcrText = node.addNode(Jcr.JCR_XMLTEXT, JcrxType.JCRX_XMLTEXT); - else - jcrText = node.getNode(Jcr.JCR_XMLTEXT); - jcrText.setProperty(Jcr.JCR_XMLCHARACTERS, raw); + + String jcrUuid = node.getIdentifier(); +// if (node.hasProperty(Property.JCR_UUID)) +// jcrUuid = node.getProperty(Property.JCR_UUID).getString(); +// else { +// // TODO use time based +// jcrUuid = UUID.randomUUID().toString(); +// node.setProperty(Property.JCR_UUID, jcrUuid); +// node.getSession().save(); +// } + + StringBuilder namespaces = new StringBuilder(); + namespaces.append(" xmlns:dbk=\"http://docbook.org/ns/docbook\""); + namespaces.append(" xmlns:jcr=\"http://www.jcp.org/jcr/1.0\""); + namespaces.append(" xmlns:xlink=\"http://www.w3.org/1999/xlink\""); + raw = "<" + node.getName() + " jcr:uuid=\"" + jcrUuid + "\"" + namespaces + ">" + raw + ""; +// System.out.println(raw); + try (InputStream in = new ByteArrayInputStream(raw.getBytes(StandardCharsets.UTF_8))) { + node.getSession().importXML(node.getParent().getPath(), in, + ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING); + // node.getSession().save(); + } catch (IOException e) { + throw new IllegalArgumentException("Cannot parse raw content of " + node, e); + } + +// try { +// DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); +// Document document; +// try (Reader in = new StringReader(raw)) { +// document = documentBuilder.parse(new InputSource(in)); +// } +// NodeList nl = document.getChildNodes(); +// for (int i = 0; i < nl.getLength(); i++) { +// org.w3c.dom.Node n = nl.item(i); +// if (node instanceof Text) { +// +// } +// } +// } catch (ParserConfigurationException | SAXException | IOException e) { +// throw new IllegalArgumentException("Cannot parse raw content of " + node, e); +// } + +// Node jcrText; +// if (!node.hasNode(Jcr.JCR_XMLTEXT)) +// jcrText = node.addNode(Jcr.JCR_XMLTEXT, JcrxType.JCRX_XMLTEXT); +// else +// jcrText = node.getNode(Jcr.JCR_XMLTEXT); +// jcrText.setProperty(Jcr.JCR_XMLCHARACTERS, raw); } else { throw new IllegalArgumentException("Don't know how to interpret " + node); } @@ -66,12 +128,28 @@ public class DbkTextInterpreter implements TextInterpreter { if (item instanceof Node) { Node node = (Node) item; if (isDbk(node, para) || isDbk(node, title)) { - Node jcrText = node.getNode(Jcr.JCR_XMLTEXT); - String txt = jcrText.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); - // TODO make it more robust - // txt = txt.replace("\n", "").replace("\t", ""); - txt = txt.replace("\t", " "); - return txt; + StringBuilder sb = new StringBuilder(); + readXml(node, sb); +// NodeIterator nit = node.getNodes(); +// while (nit.hasNext()) { +// Node child = nit.nextNode(); +// if (child.getName().equals(Jcr.JCR_XMLTEXT)) { +// Node jcrText = node.getNode(Jcr.JCR_XMLTEXT); +// String txt = jcrText.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); +// // TODO make it more robust +// // txt = txt.replace("\n", "").replace("\t", ""); +// txt = txt.replace("\t", " "); +// sb.append(txt); +// } else { +// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { +// child.getSession().exportDocumentView(child.getPath(), out, true, false); +// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8)); +// } catch (IOException e) { +// throw new IllegalStateException("Cannot export " + child, e); +// } +// } +// } + return sb.toString(); } else { throw new IllegalArgumentException("Don't know how to interpret " + node); } @@ -84,10 +162,63 @@ public class DbkTextInterpreter implements TextInterpreter { } } - final static int BR_LENGTH = "
".length(); + private void readXml(Node node, StringBuilder sb) throws RepositoryException { + NodeIterator nit = node.getNodes(); + while (nit.hasNext()) { + Node child = nit.nextNode(); + if (child.getName().equals(Jcr.JCR_XMLTEXT)) { + String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); + // TODO make it more robust + // txt = txt.replace("\n", "").replace("\t", ""); + txt = txt.replace("\t", " "); + sb.append(txt); + } else { + sb.append('<').append(child.getName()); + PropertyIterator pit = child.getProperties(); + properties: while (pit.hasNext()) { + Property p = pit.nextProperty(); + if (p.getName().startsWith("jcr:")) + continue properties; + sb.append(' ').append(p.getName()).append("=\"").append(p.getString()).append('\"'); + } + sb.append('>'); + readXml(child, sb); +// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { +// child.getSession().exportDocumentView(child.getPath(), out, true, false); +// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8)); +// } catch (IOException e) { +// throw new IllegalStateException("Cannot export " + child, e); +// } + sb.append("'); + } + } + } - public String readSimpleHtml(Item item) { - String raw = raw(item); + private void readAsSimpleHtml(Node node, StringBuilder sb) throws RepositoryException { + NodeIterator nit = node.getNodes(); + while (nit.hasNext()) { + Node child = nit.nextNode(); + if (child.getName().equals(Jcr.JCR_XMLTEXT)) { + String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); + // TODO make it more robust + // txt = txt.replace("\n", "").replace("\t", ""); + txt = txt.replace("\t", " "); + String html = textToSimpleHtml(txt); + sb.append(html); + } else if (child.getName().equals(DbkType.link.get())) { + if (child.hasProperty(DbkAttr.XLINK_HREF)) { + String href = child.getProperty(DbkAttr.XLINK_HREF).getString(); + sb.append(""); + readAsSimpleHtml(child, sb); + sb.append(""); + } + } else { + // ignore + } + } + } + + private String textToSimpleHtml(String raw) { // FIXME the saved data should be corrected instead. if (raw.indexOf('&') >= 0) { raw = raw.replace("&", "&"); @@ -121,16 +252,18 @@ public class DbkTextInterpreter implements TextInterpreter { } catch (IOException e) { throw new RuntimeException(e); } -// String[] lines = raw.split("[\r\n]+"); -// if (lines.length == 1) -// return lines[0]; -// StringBuilder sb = new StringBuilder(raw.length() + lines.length * BR_LENGTH); -// for (int i = 0; i < lines.length; i++) { -// if (i != 0) -// sb.append("
"); -// sb.append(lines[i]); -// } -// return sb.toString(); + } + + final static int BR_LENGTH = "
".length(); + + public String readSimpleHtml(Item item) { + try { + StringBuilder sb = new StringBuilder(); + readAsSimpleHtml((Node) item, sb); + return sb.toString(); + } catch (RepositoryException e) { + throw new JcrException("Cannot convert " + item + " to simple HTML", e); + } } // EXTENSIBILITY