X-Git-Url: https://git.argeo.org/?p=gpl%2Fargeo-suite.git;a=blobdiff_plain;f=publishing%2Forg.argeo.publishing.ui%2Fsrc%2Forg%2Fargeo%2Fdocbook%2Fui%2FDbkTextInterpreter.java;h=c853535440d03f758cccc12f1c0f73cf96afd032;hp=f13826bd693f1d66aff097f78e7923db1b79aa7c;hb=a23c1999af7205c71e253d4b698a37c267944ea8;hpb=147ada7da5bf6292569f17a53a77fca04c97f707 diff --git a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java index f13826b..c853535 100644 --- a/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java +++ b/publishing/org.argeo.publishing.ui/src/org/argeo/docbook/ui/DbkTextInterpreter.java @@ -1,32 +1,96 @@ package org.argeo.docbook.ui; +import static org.argeo.docbook.DbkType.para; +import static org.argeo.docbook.DbkType.title; +import static org.argeo.docbook.DbkUtils.isDbk; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import javax.jcr.ImportUUIDBehavior; import javax.jcr.Item; import javax.jcr.Node; +import javax.jcr.NodeIterator; import javax.jcr.Property; +import javax.jcr.PropertyIterator; import javax.jcr.RepositoryException; +import javax.xml.parsers.DocumentBuilderFactory; -import org.argeo.cms.CmsException; -import org.argeo.cms.text.TextInterpreter; +import org.apache.commons.io.IOUtils; +import org.argeo.docbook.DbkAttr; +import org.argeo.docbook.DbkType; +import org.argeo.jcr.Jcr; +import org.argeo.jcr.JcrException; /** Based on HTML with a few Wiki-like shortcuts. */ public class DbkTextInterpreter implements TextInterpreter { + private DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + + private String linkCssClass = DbkType.link.name(); @Override public void write(Item item, String content) { try { if (item instanceof Node) { Node node = (Node) item; - if (node.isNodeType(DocBookTypes.PARA)) { + if (isDbk(node, para) || isDbk(node, title)) { String raw = convertToStorage(node, content); validateBeforeStoring(raw); - Node jcrText; - if (!node.hasNode(DocBookNames.JCR_XMLTEXT)) - jcrText = node.addNode(DocBookNames.JCR_XMLTEXT, DocBookTypes.XMLTEXT); - else - jcrText = node.getNode(DocBookNames.JCR_XMLTEXT); - jcrText.setProperty(DocBookNames.JCR_XMLCHARACTERS, raw); + + String jcrUuid = node.getIdentifier(); +// if (node.hasProperty(Property.JCR_UUID)) +// jcrUuid = node.getProperty(Property.JCR_UUID).getString(); +// else { +// // TODO use time based +// jcrUuid = UUID.randomUUID().toString(); +// node.setProperty(Property.JCR_UUID, jcrUuid); +// node.getSession().save(); +// } + + StringBuilder namespaces = new StringBuilder(); + namespaces.append(" xmlns:dbk=\"http://docbook.org/ns/docbook\""); + namespaces.append(" xmlns:jcr=\"http://www.jcp.org/jcr/1.0\""); + namespaces.append(" xmlns:xlink=\"http://www.w3.org/1999/xlink\""); + raw = "<" + node.getName() + " jcr:uuid=\"" + jcrUuid + "\"" + namespaces + ">" + raw + ""; +// System.out.println(raw); + try (InputStream in = new ByteArrayInputStream(raw.getBytes(StandardCharsets.UTF_8))) { + node.getSession().importXML(node.getParent().getPath(), in, + ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING); + // node.getSession().save(); + } catch (IOException e) { + throw new IllegalArgumentException("Cannot parse raw content of " + node, e); + } + +// try { +// DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); +// Document document; +// try (Reader in = new StringReader(raw)) { +// document = documentBuilder.parse(new InputSource(in)); +// } +// NodeList nl = document.getChildNodes(); +// for (int i = 0; i < nl.getLength(); i++) { +// org.w3c.dom.Node n = nl.item(i); +// if (node instanceof Text) { +// +// } +// } +// } catch (ParserConfigurationException | SAXException | IOException e) { +// throw new IllegalArgumentException("Cannot parse raw content of " + node, e); +// } + +// Node jcrText; +// if (!node.hasNode(Jcr.JCR_XMLTEXT)) +// jcrText = node.addNode(Jcr.JCR_XMLTEXT, JcrxType.JCRX_XMLTEXT); +// else +// jcrText = node.getNode(Jcr.JCR_XMLTEXT); +// jcrText.setProperty(Jcr.JCR_XMLCHARACTERS, raw); } else { - throw new CmsException("Don't know how to interpret " + node); + throw new IllegalArgumentException("Don't know how to interpret " + node); } } else {// property Property property = (Property) item; @@ -34,7 +98,7 @@ public class DbkTextInterpreter implements TextInterpreter { } // item.getSession().save(); } catch (RepositoryException e) { - throw new CmsException("Cannot set content on " + item, e); + throw new JcrException("Cannot set content on " + item, e); } } @@ -44,7 +108,7 @@ public class DbkTextInterpreter implements TextInterpreter { String raw = raw(item); return convertFromStorage(item, raw); } catch (RepositoryException e) { - throw new CmsException("Cannot get " + item + " for edit", e); + throw new JcrException("Cannot get " + item + " for edit", e); } } @@ -54,21 +118,147 @@ public class DbkTextInterpreter implements TextInterpreter { item.getSession().refresh(true); if (item instanceof Node) { Node node = (Node) item; - if (node.isNodeType(DocBookTypes.PARA)) { - Node jcrText = node.getNode(DocBookNames.JCR_XMLTEXT); - String txt = jcrText.getProperty(DocBookNames.JCR_XMLCHARACTERS).getString(); - // TODO make it more robust - txt = txt.replace("\n", "").replace("\t", ""); - return txt; + if (isDbk(node, para) || isDbk(node, title)) { + StringBuilder sb = new StringBuilder(); + readXml(node, sb); +// NodeIterator nit = node.getNodes(); +// while (nit.hasNext()) { +// Node child = nit.nextNode(); +// if (child.getName().equals(Jcr.JCR_XMLTEXT)) { +// Node jcrText = node.getNode(Jcr.JCR_XMLTEXT); +// String txt = jcrText.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); +// // TODO make it more robust +// // txt = txt.replace("\n", "").replace("\t", ""); +// txt = txt.replace("\t", " "); +// sb.append(txt); +// } else { +// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { +// child.getSession().exportDocumentView(child.getPath(), out, true, false); +// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8)); +// } catch (IOException e) { +// throw new IllegalStateException("Cannot export " + child, e); +// } +// } +// } + return sb.toString(); } else { - throw new CmsException("Don't know how to interpret " + node); + throw new IllegalArgumentException("Don't know how to interpret " + node); } } else {// property Property property = (Property) item; return property.getString(); } } catch (RepositoryException e) { - throw new CmsException("Cannot get " + item + " content", e); + throw new JcrException("Cannot get " + item + " content", e); + } + } + + private void readXml(Node node, StringBuilder sb) throws RepositoryException { + NodeIterator nit = node.getNodes(); + while (nit.hasNext()) { + Node child = nit.nextNode(); + if (child.getName().equals(Jcr.JCR_XMLTEXT)) { + String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); + // TODO make it more robust + // txt = txt.replace("\n", "").replace("\t", ""); + txt = txt.replace("\t", " "); + sb.append(txt); + } else { + sb.append('<').append(child.getName()); + PropertyIterator pit = child.getProperties(); + properties: while (pit.hasNext()) { + Property p = pit.nextProperty(); + if (p.getName().startsWith("jcr:")) + continue properties; + sb.append(' ').append(p.getName()).append("=\"").append(p.getString()).append('\"'); + } + sb.append('>'); + readXml(child, sb); +// try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { +// child.getSession().exportDocumentView(child.getPath(), out, true, false); +// sb.append(new String(out.toByteArray(), StandardCharsets.UTF_8)); +// } catch (IOException e) { +// throw new IllegalStateException("Cannot export " + child, e); +// } + sb.append("'); + } + } + } + + private void readAsSimpleHtml(Node node, StringBuilder sb) throws RepositoryException { + NodeIterator nit = node.getNodes(); + while (nit.hasNext()) { + Node child = nit.nextNode(); + if (child.getName().equals(Jcr.JCR_XMLTEXT)) { + String txt = child.getProperty(Jcr.JCR_XMLCHARACTERS).getString(); + // TODO make it more robust + // txt = txt.replace("\n", "").replace("\t", ""); + txt = txt.replace("\t", " "); + String html = textToSimpleHtml(txt); + sb.append(html); + } else if (child.getName().equals(DbkType.link.get())) { + if (child.hasProperty(DbkAttr.XLINK_HREF)) { + String href = child.getProperty(DbkAttr.XLINK_HREF).getString(); + // TODO deal with other forbidden XML characters? + href = href.replace("&", "&"); + sb.append(""); + readAsSimpleHtml(child, sb); + sb.append(""); + } + } else { + // ignore + } + } + } + + private String textToSimpleHtml(String raw) { + // FIXME the saved data should be corrected instead. + if (raw.indexOf('&') >= 0) { + raw = raw.replace("&", "&"); + } + if (raw.indexOf('<') >= 0) { + raw = raw.replace("<", "<"); + } + if (raw.indexOf('>') >= 0) { + raw = raw.replace(">", ">"); + } + if (raw.indexOf('\"') >= 0) { + raw = raw.replace("\"", """); + } + if (raw.indexOf('\'') >= 0) { + raw = raw.replace("\'", "'"); + } +// raw = "" + raw + ""; + if (raw.length() == 0) + return raw; + try (StringReader reader = new StringReader(raw)) { + List lines = IOUtils.readLines(reader); + if (lines.size() == 1) + return lines.get(0); + StringBuilder sb = new StringBuilder(raw.length() + lines.size() * BR_LENGTH); + for (int i = 0; i < lines.size(); i++) { + if (i != 0) + sb.append("
"); + sb.append(lines.get(i)); + } + return sb.toString(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + final static int BR_LENGTH = "
".length(); + + public String readSimpleHtml(Item item) { + try { + StringBuilder sb = new StringBuilder(); +// sb.append("
"); + readAsSimpleHtml((Node) item, sb); +// sb.append("
"); +// System.out.println(sb); + return sb.toString(); + } catch (RepositoryException e) { + throw new JcrException("Cannot convert " + item + " to simple HTML", e); } }