1 package org
.argeo
.cms
.acr
.xml
;
3 import java
.io
.ByteArrayInputStream
;
4 import java
.io
.IOException
;
5 import java
.io
.InputStream
;
6 import java
.io
.OutputStream
;
7 import java
.lang
.System
.Logger
;
8 import java
.lang
.System
.Logger
.Level
;
9 import java
.nio
.file
.DirectoryStream
;
10 import java
.nio
.file
.Files
;
11 import java
.nio
.file
.Path
;
12 import java
.nio
.file
.Paths
;
14 import javax
.xml
.parsers
.DocumentBuilder
;
15 import javax
.xml
.parsers
.DocumentBuilderFactory
;
16 import javax
.xml
.parsers
.ParserConfigurationException
;
17 import javax
.xml
.transform
.OutputKeys
;
18 import javax
.xml
.transform
.Transformer
;
19 import javax
.xml
.transform
.TransformerConfigurationException
;
20 import javax
.xml
.transform
.TransformerException
;
21 import javax
.xml
.transform
.TransformerFactory
;
22 import javax
.xml
.transform
.TransformerFactoryConfigurationError
;
23 import javax
.xml
.transform
.dom
.DOMSource
;
24 import javax
.xml
.transform
.stream
.StreamResult
;
25 import javax
.xml
.transform
.stream
.StreamSource
;
27 import org
.w3c
.dom
.DOMException
;
28 import org
.w3c
.dom
.Document
;
29 import org
.xml
.sax
.SAXException
;
32 * Consistently normalises an XML in order to ease diff (typically in a
35 public class XmlNormalizer
{
36 private final static Logger logger
= System
.getLogger(XmlNormalizer
.class.getName());
38 private DocumentBuilder documentBuilder
;
39 private Transformer transformer
;
41 public XmlNormalizer() {
45 public XmlNormalizer(int indent
) {
47 documentBuilder
= DocumentBuilderFactory
.newNSInstance().newDocumentBuilder();
48 TransformerFactory transformerFactory
= TransformerFactory
.newInstance();
49 transformerFactory
.setAttribute("indent-number", indent
);
50 try (InputStream in
= XmlNormalizer
.class.getResourceAsStream("stripSpaces.xsl")) {
51 transformer
= transformerFactory
.newTransformer(new StreamSource(in
));
53 } catch (TransformerConfigurationException
| ParserConfigurationException
| TransformerFactoryConfigurationError
55 throw new IllegalStateException("Cannot initialise document builder and transformer", e
);
59 public void normalizeXmlFiles(DirectoryStream
<Path
> ds
) throws IOException
{
60 for (Path path
: ds
) {
61 normalizeXmlFile(path
);
65 public void normalizeXmlFile(Path path
) throws IOException
{
66 byte[] bytes
= Files
.readAllBytes(path
);
67 try (ByteArrayInputStream in
= new ByteArrayInputStream(bytes
);
68 OutputStream out
= Files
.newOutputStream(path
)) {
69 normalizeAndIndent(in
, out
);
70 logger
.log(Level
.DEBUG
, () -> "Normalized XML " + path
);
74 public void normalizeAndIndent(InputStream in
, OutputStream out
) throws IOException
{
75 normalizeAndIndent(in
, out
, 2);
78 public void normalizeAndIndent(InputStream in
, OutputStream out
, int indent
) throws IOException
{
80 Document document
= documentBuilder
.parse(in
);
82 // clear whitespaces outside tags
84 // XPath xPath = XPathFactory.newInstance().newXPath();
85 // NodeList nodeList = (NodeList) xPath.evaluate("//text()[normalize-space()='']", document,
86 // XPathConstants.NODESET);
88 // for (int i = 0; i < nodeList.getLength(); ++i) {
89 // Node node = nodeList.item(i);
90 // node.getParentNode().removeChild(node);
93 transformer
.setOutputProperty(OutputKeys
.ENCODING
, "UTF-8");
94 // transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
95 transformer
.setOutputProperty(OutputKeys
.INDENT
, "yes");
97 transformer
.transform(new DOMSource(document
), new StreamResult(out
));
98 } catch (DOMException
| IllegalArgumentException
| SAXException
| TransformerFactoryConfigurationError
99 | TransformerException e
) {
100 throw new RuntimeException("Cannot normalise and indent XML", e
);
104 public static void main(String
[] args
) throws IOException
{
105 Path dir
= Paths
.get(args
[0]);
106 XmlNormalizer xmlNormalizer
= new XmlNormalizer();
107 DirectoryStream
<Path
> ds
= Files
.newDirectoryStream(dir
, "*.svg");
108 xmlNormalizer
.normalizeXmlFiles(ds
);