1 package org
.argeo
.cms
.acr
.xml
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.InputStream
;
6 import java
.io
.OutputStream
;
7 import java
.nio
.file
.DirectoryStream
;
8 import java
.nio
.file
.Files
;
9 import java
.nio
.file
.Path
;
10 import java
.nio
.file
.Paths
;
12 import javax
.xml
.parsers
.DocumentBuilder
;
13 import javax
.xml
.parsers
.DocumentBuilderFactory
;
14 import javax
.xml
.parsers
.ParserConfigurationException
;
15 import javax
.xml
.transform
.OutputKeys
;
16 import javax
.xml
.transform
.Transformer
;
17 import javax
.xml
.transform
.TransformerConfigurationException
;
18 import javax
.xml
.transform
.TransformerException
;
19 import javax
.xml
.transform
.TransformerFactory
;
20 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
21 import javax
.xml
.transform
.dom
.DOMSource
;
22 import javax
.xml
.transform
.stream
.StreamResult
;
23 import javax
.xml
.transform
.stream
.StreamSource
;
25 import org
.w3c
.dom
.DOMException
;
26 import org
.w3c
.dom
.Document
;
27 import org
.xml
.sax
.SAXException
;
30 * Consistently normalises an XML in order to ease diff (typically in a
33 public class XmlNormalizer
{
34 // private final static Logger logger = System.getLogger(XmlNormalizer.class.getName());
36 private DocumentBuilder documentBuilder
;
37 private Transformer transformer
;
39 public XmlNormalizer() {
43 public XmlNormalizer(int indent
) {
45 documentBuilder
= DocumentBuilderFactory
.newNSInstance().newDocumentBuilder();
46 TransformerFactory transformerFactory
= TransformerFactory
.newInstance();
47 transformerFactory
.setAttribute("indent-number", indent
);
48 try (InputStream in
= XmlNormalizer
.class.getResourceAsStream("stripSpaces.xsl")) {
49 transformer
= transformerFactory
.newTransformer(new StreamSource(in
));
51 } catch (TransformerConfigurationException
| ParserConfigurationException
| TransformerFactoryConfigurationError
53 throw new IllegalStateException("Cannot initialise document builder and transformer", e
);
57 public void normalizeXmlFiles(DirectoryStream
<Path
> ds
) throws IOException
{
58 for (Path path
: ds
) {
59 normalizeXmlFile(path
);
63 public void normalizeXmlFile(Path path
) throws IOException
{
64 byte[] bytes
= Files
.readAllBytes(path
);
65 try (ByteArrayInputStream in
= new ByteArrayInputStream(bytes
);
66 OutputStream out
= Files
.newOutputStream(path
)) {
67 normalizeAndIndent(in
, out
);
68 // logger.log(Level.DEBUG, () -> "Normalized XML " + path);
72 public void normalizeAndIndent(InputStream in
, OutputStream out
) throws IOException
{
73 normalizeAndIndent(in
, out
, 2);
76 public void normalizeAndIndent(InputStream in
, OutputStream out
, int indent
) throws IOException
{
78 Document document
= documentBuilder
.parse(in
);
80 // clear whitespaces outside tags
82 // XPath xPath = XPathFactory.newInstance().newXPath();
83 // NodeList nodeList = (NodeList) xPath.evaluate("//text()[normalize-space()='']", document,
84 // XPathConstants.NODESET);
86 // for (int i = 0; i < nodeList.getLength(); ++i) {
87 // Node node = nodeList.item(i);
88 // node.getParentNode().removeChild(node);
91 transformer
.setOutputProperty(OutputKeys
.ENCODING
, "UTF-8");
92 // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
93 transformer
.setOutputProperty(OutputKeys
.INDENT
, "yes");
95 transformer
.transform(new DOMSource(document
), new StreamResult(out
));
96 } catch (DOMException
| IllegalArgumentException
| SAXException
| TransformerFactoryConfigurationError
97 | TransformerException e
) {
98 throw new RuntimeException("Cannot normalise and indent XML", e
);
102 public static void main(String
[] args
) throws IOException
{
103 Path dir
= Paths
.get(args
[0]);
104 XmlNormalizer xmlNormalizer
= new XmlNormalizer();
105 DirectoryStream
<Path
> ds
= Files
.newDirectoryStream(dir
, "*.svg");
106 xmlNormalizer
.normalizeXmlFiles(ds
);