1 /** 2 * Copyright 2010 The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 package org.apache.hadoop.hbase.util; 21 22 import java.util.StringTokenizer; 23 import java.util.regex.Matcher; 24 import java.util.regex.Pattern; 25 26 /** 27 * Utility creating hbase friendly keys. 28 * Use fabricating row names or column qualifiers. 29 * <p>TODO: Add createSchemeless key, a key that doesn't care if scheme is 30 * http or https. 31 * @see Bytes#split(byte[], byte[], int) 32 */ 33 public class Keying { 34 private static final String SCHEME = "r:"; 35 private static final Pattern URI_RE_PARSER = 36 Pattern.compile("^([^:/?#]+://(?:[^/?#@]+@)?)([^:/?#]+)(.*)$"); 37 38 /** 39 * Makes a key out of passed URI for use as row name or column qualifier. 40 * 41 * This method runs transforms on the passed URI so it sits better 42 * as a key (or portion-of-a-key) in hbase. The <code>host</code> portion of 43 * the URI authority is reversed so subdomains sort under their parent 44 * domain. The returned String is an opaque URI of an artificial 45 * <code>r:</code> scheme to prevent the result being considered an URI of 46 * the original scheme. Here is an example of the transform: The url 47 * <code>http://lucene.apache.org/index.html?query=something#middle<code> is 48 * returned as 49 * <code>r:http://org.apache.lucene/index.html?query=something#middle</code> 50 * The transforms are reversible. No transform is done if passed URI is 51 * not hierarchical. 52 * 53 * <p>If authority <code>userinfo</code> is present, will mess up the sort 54 * (until we do more work).</p> 55 * 56 * @param u URL to transform. 57 * @return An opaque URI of artificial 'r' scheme with host portion of URI 58 * authority reversed (if present). 59 * @see #keyToUri(String) 60 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC2396</a> 61 */ 62 public static String createKey(final String u) { 63 if (u.startsWith(SCHEME)) { 64 throw new IllegalArgumentException("Starts with " + SCHEME); 65 } 66 Matcher m = getMatcher(u); 67 if (m == null || !m.matches()) { 68 // If no match, return original String. 69 return u; 70 } 71 return SCHEME + m.group(1) + reverseHostname(m.group(2)) + m.group(3); 72 } 73 74 /** 75 * Reverse the {@link #createKey(String)} transform. 76 * 77 * @param s <code>URI</code> made by {@link #createKey(String)}. 78 * @return 'Restored' URI made by reversing the {@link #createKey(String)} 79 * transform. 80 */ 81 public static String keyToUri(final String s) { 82 if (!s.startsWith(SCHEME)) { 83 return s; 84 } 85 Matcher m = getMatcher(s.substring(SCHEME.length())); 86 if (m == null || !m.matches()) { 87 // If no match, return original String. 88 return s; 89 } 90 return m.group(1) + reverseHostname(m.group(2)) + m.group(3); 91 } 92 93 private static Matcher getMatcher(final String u) { 94 if (u == null || u.length() <= 0) { 95 return null; 96 } 97 return URI_RE_PARSER.matcher(u); 98 } 99 100 private static String reverseHostname(final String hostname) { 101 if (hostname == null) { 102 return ""; 103 } 104 StringBuilder sb = new StringBuilder(hostname.length()); 105 for (StringTokenizer st = new StringTokenizer(hostname, ".", false); 106 st.hasMoreElements();) { 107 Object next = st.nextElement(); 108 if (sb.length() > 0) { 109 sb.insert(0, "."); 110 } 111 sb.insert(0, next); 112 } 113 return sb.toString(); 114 } 115 }