1 /**
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.util;
21
22 import java.util.StringTokenizer;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 /**
27 * Utility creating hbase friendly keys.
28 * Use fabricating row names or column qualifiers.
29 * <p>TODO: Add createSchemeless key, a key that doesn't care if scheme is
30 * http or https.
31 * @see Bytes#split(byte[], byte[], int)
32 */
33 public class Keying {
34 private static final String SCHEME = "r:";
35 private static final Pattern URI_RE_PARSER =
36 Pattern.compile("^([^:/?#]+://(?:[^/?#@]+@)?)([^:/?#]+)(.*)$");
37
38 /**
39 * Makes a key out of passed URI for use as row name or column qualifier.
40 *
41 * This method runs transforms on the passed URI so it sits better
42 * as a key (or portion-of-a-key) in hbase. The <code>host</code> portion of
43 * the URI authority is reversed so subdomains sort under their parent
44 * domain. The returned String is an opaque URI of an artificial
45 * <code>r:</code> scheme to prevent the result being considered an URI of
46 * the original scheme. Here is an example of the transform: The url
47 * <code>http://lucene.apache.org/index.html?query=something#middle<code> is
48 * returned as
49 * <code>r:http://org.apache.lucene/index.html?query=something#middle</code>
50 * The transforms are reversible. No transform is done if passed URI is
51 * not hierarchical.
52 *
53 * <p>If authority <code>userinfo</code> is present, will mess up the sort
54 * (until we do more work).</p>
55 *
56 * @param u URL to transform.
57 * @return An opaque URI of artificial 'r' scheme with host portion of URI
58 * authority reversed (if present).
59 * @see #keyToUri(String)
60 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC2396</a>
61 */
62 public static String createKey(final String u) {
63 if (u.startsWith(SCHEME)) {
64 throw new IllegalArgumentException("Starts with " + SCHEME);
65 }
66 Matcher m = getMatcher(u);
67 if (m == null || !m.matches()) {
68 // If no match, return original String.
69 return u;
70 }
71 return SCHEME + m.group(1) + reverseHostname(m.group(2)) + m.group(3);
72 }
73
74 /**
75 * Reverse the {@link #createKey(String)} transform.
76 *
77 * @param s <code>URI</code> made by {@link #createKey(String)}.
78 * @return 'Restored' URI made by reversing the {@link #createKey(String)}
79 * transform.
80 */
81 public static String keyToUri(final String s) {
82 if (!s.startsWith(SCHEME)) {
83 return s;
84 }
85 Matcher m = getMatcher(s.substring(SCHEME.length()));
86 if (m == null || !m.matches()) {
87 // If no match, return original String.
88 return s;
89 }
90 return m.group(1) + reverseHostname(m.group(2)) + m.group(3);
91 }
92
93 private static Matcher getMatcher(final String u) {
94 if (u == null || u.length() <= 0) {
95 return null;
96 }
97 return URI_RE_PARSER.matcher(u);
98 }
99
100 private static String reverseHostname(final String hostname) {
101 if (hostname == null) {
102 return "";
103 }
104 StringBuilder sb = new StringBuilder(hostname.length());
105 for (StringTokenizer st = new StringTokenizer(hostname, ".", false);
106 st.hasMoreElements();) {
107 Object next = st.nextElement();
108 if (sb.length() > 0) {
109 sb.insert(0, ".");
110 }
111 sb.insert(0, next);
112 }
113 return sb.toString();
114 }
115 }