2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
17 package org
.argeo
.jcr
.fs
;
19 import java
.io
.ByteArrayOutputStream
;
20 import java
.io
.UnsupportedEncodingException
;
21 import java
.security
.MessageDigest
;
22 import java
.security
.NoSuchAlgorithmException
;
23 import java
.util
.ArrayList
;
24 import java
.util
.BitSet
;
25 import java
.util
.Properties
;
28 * <b>Hacked from org.apache.jackrabbit.util.Text in Jackrabbit JCR Commons</b>
29 * This Class provides some text related utilities
42 public static final char[] hexTable
= "0123456789abcdef".toCharArray();
45 * Calculate an MD5 hash of the string given.
50 * the character encoding to use
51 * @return a hex encoded string of the md5 digested input
53 public static String
md5(String data
, String enc
) throws UnsupportedEncodingException
{
55 return digest("MD5", data
.getBytes(enc
));
56 } catch (NoSuchAlgorithmException e
) {
57 throw new InternalError("MD5 digest not available???");
62 * Calculate an MD5 hash of the string given using 'utf-8' encoding.
66 * @return a hex encoded string of the md5 digested input
68 public static String
md5(String data
) {
70 return md5(data
, "utf-8");
71 } catch (UnsupportedEncodingException e
) {
72 throw new InternalError("UTF8 digest not available???");
77 * Digest the plain string using the given algorithm.
80 * The alogrithm for the digest. This algorithm must be supported
81 * by the MessageDigest class.
83 * The plain text String to be digested.
85 * The character encoding to use
86 * @return The digested plain text String represented as Hex digits.
87 * @throws java.security.NoSuchAlgorithmException
88 * if the desired algorithm is not supported by the
89 * MessageDigest class.
90 * @throws java.io.UnsupportedEncodingException
91 * if the encoding is not supported
93 public static String
digest(String algorithm
, String data
, String enc
)
94 throws NoSuchAlgorithmException
, UnsupportedEncodingException
{
96 return digest(algorithm
, data
.getBytes(enc
));
100 * Digest the plain string using the given algorithm.
103 * The algorithm for the digest. This algorithm must be supported
104 * by the MessageDigest class.
106 * the data to digest with the given algorithm
107 * @return The digested plain text String represented as Hex digits.
108 * @throws java.security.NoSuchAlgorithmException
109 * if the desired algorithm is not supported by the
110 * MessageDigest class.
112 public static String
digest(String algorithm
, byte[] data
) throws NoSuchAlgorithmException
{
114 MessageDigest md
= MessageDigest
.getInstance(algorithm
);
115 byte[] digest
= md
.digest(data
);
116 StringBuilder res
= new StringBuilder(digest
.length
* 2);
117 for (byte b
: digest
) {
118 res
.append(hexTable
[(b
>> 4) & 15]);
119 res
.append(hexTable
[b
& 15]);
121 return res
.toString();
125 * returns an array of strings decomposed of the original string, split at
126 * every occurrence of 'ch'. if 2 'ch' follow each other with no
127 * intermediate characters, empty "" entries are avoided.
130 * the string to decompose
132 * the character to use a split pattern
133 * @return an array of strings
135 public static String
[] explode(String str
, int ch
) {
136 return explode(str
, ch
, false);
140 * returns an array of strings decomposed of the original string, split at
141 * every occurrence of 'ch'.
144 * the string to decompose
146 * the character to use a split pattern
147 * @param respectEmpty
148 * if <code>true</code>, empty elements are generated
149 * @return an array of strings
151 public static String
[] explode(String str
, int ch
, boolean respectEmpty
) {
152 if (str
== null || str
.length() == 0) {
153 return new String
[0];
156 ArrayList
<String
> strings
= new ArrayList
<String
>();
161 while ((pos
= str
.indexOf(ch
, lastpos
)) >= 0) {
162 if (pos
- lastpos
> 0 || respectEmpty
) {
163 strings
.add(str
.substring(lastpos
, pos
));
168 if (lastpos
< str
.length()) {
169 strings
.add(str
.substring(lastpos
));
170 } else if (respectEmpty
&& lastpos
== str
.length()) {
174 // return string array
175 return strings
.toArray(new String
[strings
.size()]);
179 * Concatenates all strings in the string array using the specified
184 * @return the concatenated string
186 public static String
implode(String
[] arr
, String delim
) {
187 StringBuilder buf
= new StringBuilder();
188 for (int i
= 0; i
< arr
.length
; i
++) {
194 return buf
.toString();
198 * Replaces all occurrences of <code>oldString</code> in <code>text</code>
199 * with <code>newString</code>.
203 * old substring to be replaced with <code>newString</code>
205 * new substring to replace occurrences of <code>oldString</code>
208 public static String
replace(String text
, String oldString
, String newString
) {
209 if (text
== null || oldString
== null || newString
== null) {
210 throw new IllegalArgumentException("null argument");
212 int pos
= text
.indexOf(oldString
);
217 StringBuilder sb
= new StringBuilder(text
.length());
219 sb
.append(text
.substring(lastPos
, pos
));
220 sb
.append(newString
);
221 lastPos
= pos
+ oldString
.length();
222 pos
= text
.indexOf(oldString
, lastPos
);
224 if (lastPos
< text
.length()) {
225 sb
.append(text
.substring(lastPos
));
227 return sb
.toString();
231 * Replaces XML characters in the given string that might need escaping as
232 * XML text or attribute
238 public static String
encodeIllegalXMLCharacters(String text
) {
239 return encodeMarkupCharacters(text
, false);
243 * Replaces HTML characters in the given string that might need escaping as
244 * HTML text or attribute
250 public static String
encodeIllegalHTMLCharacters(String text
) {
251 return encodeMarkupCharacters(text
, true);
254 private static String
encodeMarkupCharacters(String text
, boolean isHtml
) {
256 throw new IllegalArgumentException("null argument");
258 StringBuilder buf
= null;
259 int length
= text
.length();
261 for (int i
= 0; i
< length
; i
++) {
262 int ch
= text
.charAt(i
);
270 buf
= new StringBuilder();
273 buf
.append(text
.substring(pos
, i
));
282 } else if (ch
== '>') {
284 } else if (ch
== '&') {
286 } else if (ch
== '"') {
287 buf
.append(""");
288 } else if (ch
== '\'') {
289 buf
.append(isHtml ?
"'" : "'");
296 buf
.append(text
.substring(pos
));
298 return buf
.toString();
303 * The list of characters that are not encoded by the <code>escape()</code>
304 * and <code>unescape()</code> METHODS. They contains the characters as
305 * defined 'unreserved' in section 2.3 of the RFC 2396 'URI generic syntax':
309 * unreserved = alphanum | mark
310 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
313 public static BitSet URISave
;
316 * Same as {@link #URISave} but also contains the '/'
318 public static BitSet URISaveEx
;
321 URISave
= new BitSet(256);
323 for (i
= 'a'; i
<= 'z'; i
++) {
326 for (i
= 'A'; i
<= 'Z'; i
++) {
329 for (i
= '0'; i
<= '9'; i
++) {
342 URISaveEx
= (BitSet
) URISave
.clone();
347 * Does an URL encoding of the <code>string</code> using the
348 * <code>escape</code> character. The characters that don't need encoding
349 * are those defined 'unreserved' in section 2.3 of the 'URI generic syntax'
350 * RFC 2396, but without the escape character.
353 * the string to encode.
355 * the escape character.
356 * @return the escaped string
357 * @throws NullPointerException
358 * if <code>string</code> is <code>null</code>.
360 public static String
escape(String string
, char escape
) {
361 return escape(string
, escape
, false);
365 * Does an URL encoding of the <code>string</code> using the
366 * <code>escape</code> character. The characters that don't need encoding
367 * are those defined 'unreserved' in section 2.3 of the 'URI generic syntax'
368 * RFC 2396, but without the escape character. If <code>isPath</code> is
369 * <code>true</code>, additionally the slash '/' is ignored, too.
372 * the string to encode.
374 * the escape character.
376 * if <code>true</code>, the string is treated as path
377 * @return the escaped string
378 * @throws NullPointerException
379 * if <code>string</code> is <code>null</code>.
381 public static String
escape(String string
, char escape
, boolean isPath
) {
383 BitSet validChars
= isPath ? URISaveEx
: URISave
;
384 byte[] bytes
= string
.getBytes("utf-8");
385 StringBuilder out
= new StringBuilder(bytes
.length
);
386 for (byte aByte
: bytes
) {
387 int c
= aByte
& 0xff;
388 if (validChars
.get(c
) && c
!= escape
) {
389 out
.append((char) c
);
392 out
.append(hexTable
[(c
>> 4) & 0x0f]);
393 out
.append(hexTable
[(c
) & 0x0f]);
396 return out
.toString();
397 } catch (UnsupportedEncodingException e
) {
398 throw new InternalError(e
.toString());
403 * Does a URL encoding of the <code>string</code>. The characters that don't
404 * need encoding are those defined 'unreserved' in section 2.3 of the 'URI
405 * generic syntax' RFC 2396.
408 * the string to encode
409 * @return the escaped string
410 * @throws NullPointerException
411 * if <code>string</code> is <code>null</code>.
413 public static String
escape(String string
) {
414 return escape(string
, '%');
418 * Does a URL encoding of the <code>path</code>. The characters that don't
419 * need encoding are those defined 'unreserved' in section 2.3 of the 'URI
420 * generic syntax' RFC 2396. In contrast to the {@link #escape(String)}
421 * method, not the entire path string is escaped, but every individual part
422 * (i.e. the slashes are not escaped).
426 * @return the escaped path
427 * @throws NullPointerException
428 * if <code>path</code> is <code>null</code>.
430 public static String
escapePath(String path
) {
431 return escape(path
, '%', true);
435 * Does a URL decoding of the <code>string</code> using the
436 * <code>escape</code> character. Please note that in opposite to the
437 * {@link java.net.URLDecoder} it does not transform the + into spaces.
440 * the string to decode
442 * the escape character
443 * @return the decoded string
444 * @throws NullPointerException
445 * if <code>string</code> is <code>null</code>.
446 * @throws IllegalArgumentException
447 * if the 2 characters following the escape character do not
448 * represent a hex-number or if not enough characters follow an
451 public static String
unescape(String string
, char escape
) {
453 byte[] utf8
= string
.getBytes("utf-8");
455 // Check whether escape occurs at invalid position
456 if ((utf8
.length
>= 1 && utf8
[utf8
.length
- 1] == escape
)
457 || (utf8
.length
>= 2 && utf8
[utf8
.length
- 2] == escape
)) {
458 throw new IllegalArgumentException("Premature end of escape sequence at end of input");
461 ByteArrayOutputStream out
= new ByteArrayOutputStream(utf8
.length
);
462 for (int k
= 0; k
< utf8
.length
; k
++) {
465 out
.write((decodeDigit(utf8
[++k
]) << 4) + decodeDigit(utf8
[++k
]));
471 return new String(out
.toByteArray(), "utf-8");
472 } catch (UnsupportedEncodingException e
) {
473 throw new InternalError(e
.toString());
478 * Does a URL decoding of the <code>string</code>. Please note that in
479 * opposite to the {@link java.net.URLDecoder} it does not transform the +
483 * the string to decode
484 * @return the decoded string
485 * @throws NullPointerException
486 * if <code>string</code> is <code>null</code>.
487 * @throws ArrayIndexOutOfBoundsException
488 * if not enough character follow an escape character
489 * @throws IllegalArgumentException
490 * if the 2 characters following the escape character do not
491 * represent a hex-number.
493 public static String
unescape(String string
) {
494 return unescape(string
, '%');
498 * Escapes all illegal JCR name characters of a string. The encoding is
499 * loosely modeled after URI encoding, but only encodes the characters it
500 * absolutely needs to in order to make the resulting string a valid JCR
501 * name. Use {@link #unescapeIllegalJcrChars(String)} for decoding.
504 * <xmp> simplename ::= onecharsimplename | twocharsimplename |
505 * threeormorecharname onecharsimplename ::= (* Any Unicode character
506 * except: '.', '/', ':', '[', ']', '*', '|' or any whitespace character *)
507 * twocharsimplename ::= '.' onecharsimplename | onecharsimplename '.' |
508 * onecharsimplename onecharsimplename threeormorecharname ::= nonspace
509 * string nonspace string ::= char | string char char ::= nonspace | ' '
510 * nonspace ::= (* Any Unicode character except: '/', ':', '[', ']', '*',
511 * '|' or any whitespace character *) </xmp>
515 * @return the escaped name
517 public static String
escapeIllegalJcrChars(String name
) {
518 return escapeIllegalChars(name
, "%/:[]*|\t\r\n");
522 * Escapes all illegal JCR 1.0 name characters of a string. Use
523 * {@link #unescapeIllegalJcrChars(String)} for decoding.
526 * <xmp> simplename ::= onecharsimplename | twocharsimplename |
527 * threeormorecharname onecharsimplename ::= (* Any Unicode character
528 * except: '.', '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace
529 * character *) twocharsimplename ::= '.' onecharsimplename |
530 * onecharsimplename '.' | onecharsimplename onecharsimplename
531 * threeormorecharname ::= nonspace string nonspace string ::= char | string
532 * char char ::= nonspace | ' ' nonspace ::= (* Any Unicode character
533 * except: '/', ':', '[', ']', '*', ''', '"', '|' or any whitespace
534 * character *) </xmp>
536 * @since Apache Jackrabbit 2.3.2 and 2.2.10
538 * "https://issues.apache.org/jira/browse/JCR-3128">JCR-3128</a>
541 * @return the escaped name
543 public static String
escapeIllegalJcr10Chars(String name
) {
544 return escapeIllegalChars(name
, "%/:[]*'\"|\t\r\n");
547 private static String
escapeIllegalChars(String name
, String illegal
) {
548 StringBuilder buffer
= new StringBuilder(name
.length() * 2);
549 for (int i
= 0; i
< name
.length(); i
++) {
550 char ch
= name
.charAt(i
);
551 if (illegal
.indexOf(ch
) != -1 || (ch
== '.' && name
.length() < 3)
552 || (ch
== ' ' && (i
== 0 || i
== name
.length() - 1))) {
554 buffer
.append(Character
.toUpperCase(Character
.forDigit(ch
/ 16, 16)));
555 buffer
.append(Character
.toUpperCase(Character
.forDigit(ch
% 16, 16)));
560 return buffer
.toString();
564 * Escapes illegal XPath search characters at the end of a string.
567 * A search string like 'test?' will run into a ParseException documented in
568 * http://issues.apache.org/jira/browse/JCR-1248
571 * the string to encode
572 * @return the escaped string
574 public static String
escapeIllegalXpathSearchChars(String s
) {
575 StringBuilder sb
= new StringBuilder();
576 sb
.append(s
.substring(0, (s
.length() - 1)));
577 char c
= s
.charAt(s
.length() - 1);
578 // NOTE: keep this in sync with _ESCAPED_CHAR below!
579 if (c
== '!' || c
== '(' || c
== ':' || c
== '^' || c
== '[' || c
== ']' || c
== '{' || c
== '}' || c
== '?') {
583 return sb
.toString();
587 * Unescapes previously escaped jcr chars.
589 * Please note, that this does not exactly the same as the url related
590 * {@link #unescape(String)}, since it handles the byte-encoding
594 * the name to unescape
595 * @return the unescaped name
597 public static String
unescapeIllegalJcrChars(String name
) {
598 StringBuilder buffer
= new StringBuilder(name
.length());
599 int i
= name
.indexOf('%');
600 while (i
> -1 && i
+ 2 < name
.length()) {
601 buffer
.append(name
.toCharArray(), 0, i
);
602 int a
= Character
.digit(name
.charAt(i
+ 1), 16);
603 int b
= Character
.digit(name
.charAt(i
+ 2), 16);
604 if (a
> -1 && b
> -1) {
605 buffer
.append((char) (a
* 16 + b
));
606 name
= name
.substring(i
+ 3);
609 name
= name
.substring(i
+ 1);
611 i
= name
.indexOf('%');
614 return buffer
.toString();
618 * Returns the name part of the path. If the given path is already a name
619 * (i.e. contains no slashes) it is returned.
623 * @return the name part or <code>null</code> if <code>path</code> is
626 public static String
getName(String path
) {
627 return getName(path
, '/');
631 * Returns the name part of the path, delimited by the given
632 * <code>delim</code>. If the given path is already a name (i.e. contains no
633 * <code>delim</code> characters) it is returned.
639 * @return the name part or <code>null</code> if <code>path</code> is
642 public static String
getName(String path
, char delim
) {
643 return path
== null ?
null : path
.substring(path
.lastIndexOf(delim
) + 1);
647 * Same as {@link #getName(String)} but adding the possibility to pass paths
648 * that end with a trailing '/'
650 * @see #getName(String)
652 public static String
getName(String path
, boolean ignoreTrailingSlash
) {
653 if (ignoreTrailingSlash
&& path
!= null && path
.endsWith("/") && path
.length() > 1) {
654 path
= path
.substring(0, path
.length() - 1);
656 return getName(path
);
660 * Returns the namespace prefix of the given <code>qname</code>. If the
661 * prefix is missing, an empty string is returned. Please note, that this
662 * method does not validate the name or prefix.
664 * the qname has the format: qname := [prefix ':'] local;
668 * @return the prefix of the name or "".
670 * @see #getLocalName(String)
672 * @throws NullPointerException
673 * if <code>qname</code> is <code>null</code>
675 public static String
getNamespacePrefix(String qname
) {
676 int pos
= qname
.indexOf(':');
677 return pos
>= 0 ? qname
.substring(0, pos
) : "";
681 * Returns the local name of the given <code>qname</code>. Please note, that
682 * this method does not validate the name.
684 * the qname has the format: qname := [prefix ':'] local;
688 * @return the localname
690 * @see #getNamespacePrefix(String)
692 * @throws NullPointerException
693 * if <code>qname</code> is <code>null</code>
695 public static String
getLocalName(String qname
) {
696 int pos
= qname
.indexOf(':');
697 return pos
>= 0 ? qname
.substring(pos
+ 1) : qname
;
701 * Determines, if two paths denote hierarchical siblins.
707 * @return true if on same level, false otherwise
709 public static boolean isSibling(String p1
, String p2
) {
710 int pos1
= p1
.lastIndexOf('/');
711 int pos2
= p2
.lastIndexOf('/');
712 return (pos1
== pos2
&& pos1
>= 0 && p1
.regionMatches(0, p2
, 0, pos1
));
716 * Determines if the <code>descendant</code> path is hierarchical a
717 * descendant of <code>path</code>.
722 * the potential descendant
723 * @return <code>true</code> if the <code>descendant</code> is a descendant;
724 * <code>false</code> otherwise.
726 public static boolean isDescendant(String path
, String descendant
) {
727 String pattern
= path
.endsWith("/") ? path
: path
+ "/";
728 return !pattern
.equals(descendant
) && descendant
.startsWith(pattern
);
732 * Determines if the <code>descendant</code> path is hierarchical a
733 * descendant of <code>path</code> or equal to it.
738 * the potential descendant
739 * @return <code>true</code> if the <code>descendant</code> is a descendant
740 * or equal; <code>false</code> otherwise.
742 public static boolean isDescendantOrEqual(String path
, String descendant
) {
743 if (path
.equals(descendant
)) {
746 String pattern
= path
.endsWith("/") ? path
: path
+ "/";
747 return descendant
.startsWith(pattern
);
752 * Returns the n<sup>th</sup> relative parent of the path, where n=level.
756 * Text.getRelativeParent("/foo/bar/test", 1) == "/foo/bar"
760 * the path of the page
762 * the level of the parent
764 public static String
getRelativeParent(String path
, int level
) {
765 int idx
= path
.length();
767 idx
= path
.lastIndexOf('/', idx
- 1);
773 return (idx
== 0) ?
"/" : path
.substring(0, idx
);
777 * Same as {@link #getRelativeParent(String, int)} but adding the
778 * possibility to pass paths that end with a trailing '/'
780 * @see #getRelativeParent(String, int)
782 public static String
getRelativeParent(String path
, int level
, boolean ignoreTrailingSlash
) {
783 if (ignoreTrailingSlash
&& path
.endsWith("/") && path
.length() > 1) {
784 path
= path
.substring(0, path
.length() - 1);
786 return getRelativeParent(path
, level
);
790 * Returns the n<sup>th</sup> absolute parent of the path, where n=level.
794 * Text.getAbsoluteParent("/foo/bar/test", 1) == "/foo/bar"
798 * the path of the page
800 * the level of the parent
802 public static String
getAbsoluteParent(String path
, int level
) {
804 int len
= path
.length();
805 while (level
>= 0 && idx
< len
) {
806 idx
= path
.indexOf('/', idx
+ 1);
812 return level
>= 0 ?
"" : path
.substring(0, idx
);
816 * Performs variable replacement on the given string value. Each
817 * <code>${...}</code> sequence within the given value is replaced with the
818 * value of the named parser variable. If a variable is not found in the
819 * properties an IllegalArgumentException is thrown unless
820 * <code>ignoreMissing</code> is <code>true</code>. In the later case, the
821 * missing variable is replaced by the empty string.
825 * @param ignoreMissing
826 * if <code>true</code>, missing variables are replaced by the
828 * @return value after variable replacements
829 * @throws IllegalArgumentException
830 * if the replacement of a referenced variable is not found
832 public static String
replaceVariables(Properties variables
, String value
, boolean ignoreMissing
)
833 throws IllegalArgumentException
{
834 StringBuilder result
= new StringBuilder();
837 // +--+-+--------+-+-----------------+
839 // +--+-+--------+-+-----------------+
840 int p
= 0, q
= value
.indexOf("${"); // Find first ${
842 result
.append(value
.substring(p
, q
)); // Text before ${
844 q
= value
.indexOf("}", q
+ 2); // Find }
846 String variable
= value
.substring(p
+ 2, q
);
847 String replacement
= variables
.getProperty(variable
);
848 if (replacement
== null) {
852 throw new IllegalArgumentException("Replacement not found for ${" + variable
+ "}.");
855 result
.append(replacement
);
857 q
= value
.indexOf("${", p
); // Find next ${
860 result
.append(value
.substring(p
, value
.length())); // Trailing text
862 return result
.toString();
865 private static byte decodeDigit(byte b
) {
866 if (b
>= 0x30 && b
<= 0x39) {
867 return (byte) (b
- 0x30);
868 } else if (b
>= 0x41 && b
<= 0x46) {
869 return (byte) (b
- 0x37);
870 } else if (b
>= 0x61 && b
<= 0x66) {
871 return (byte) (b
- 0x57);
873 throw new IllegalArgumentException("Escape sequence is not hexadecimal: " + (char) b
);