org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize
Class Tokenizer

java.lang.Object
  extended by org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer

@InterfaceAudience.Private
public class Tokenizer
extends Object

Data structure used in the first stage of PrefixTree encoding:

  • accepts a sorted stream of ByteRanges
  • splits them into a set of tokens, each held by a TokenizerNode
  • connects the TokenizerNodes via standard java references
  • keeps a pool of TokenizerNodes and a reusable byte[] for holding all token content


    Mainly used for turning Cell rowKeys into a trie, but also used for family and qualifier encoding.


    Field Summary
    protected static Boolean INCLUDE_FULL_TREE_IN_TO_STRING
              print/debug
    protected  long lastNodeId
               
    protected  int maxElementLength
               
    protected  ArrayList<TokenizerNode> nodes
               
    protected  int numArraysAdded
              fields
    protected  int numNodes
               
    protected  TokenizerNode root
               
    protected  byte[] tokens
               
    protected  int tokensLength
               
    protected  int treeDepth
               
     
    Constructor Summary
    Tokenizer()
              construct
     
    Method Summary
     void addAll(ArrayList<ByteRange> sortedByteRanges)
              building
    protected  TokenizerNode addNode(TokenizerNode parent, int nodeDepth, int tokenStartOffset, ByteRange token, int inputTokenOffset)
               
     void addSorted(ByteRange bytes)
               
     void appendNodes(List<TokenizerNode> appendTo, boolean includeNonLeaves, boolean includeLeaves)
               
     Tokenizer appendOutputArrayOffsets(List<Integer> offsets)
               
    protected  int appendTokenAndRepointByteRange(ByteRange token, int inputTokenOffset)
               
     List<byte[]> getArrays()
               
     int getMaxElementLength()
               
     void getNode(TokenizerRowSearchResult resultHolder, byte[] key, int keyOffset, int keyLength)
               
     ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves, boolean includeLeaves)
               
     int getNumAdded()
              read
     TokenizerNode getRoot()
              get/set
     String getStructuralString()
               
     int getTreeDepth()
               
     void incrementNumOccurrencesOfLatestValue()
               
    protected  long nextNodeId()
               
     void reset()
               
     Tokenizer setNodeFirstInsertionIndexes()
              write
    protected  void submitMaxNodeDepthCandidate(int nodeDepth)
               
     String toString()
               
     
    Methods inherited from class java.lang.Object
    clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
     

    Field Detail

    numArraysAdded

    protected int numArraysAdded
    fields


    lastNodeId

    protected long lastNodeId

    nodes

    protected ArrayList<TokenizerNode> nodes

    numNodes

    protected int numNodes

    root

    protected TokenizerNode root

    tokens

    protected byte[] tokens

    tokensLength

    protected int tokensLength

    maxElementLength

    protected int maxElementLength

    treeDepth

    protected int treeDepth

    INCLUDE_FULL_TREE_IN_TO_STRING

    protected static final Boolean INCLUDE_FULL_TREE_IN_TO_STRING
    print/debug

    Constructor Detail

    Tokenizer

    public Tokenizer()
    construct

    Method Detail

    reset

    public void reset()

    addAll

    public void addAll(ArrayList<ByteRange> sortedByteRanges)
    building


    addSorted

    public void addSorted(ByteRange bytes)

    incrementNumOccurrencesOfLatestValue

    public void incrementNumOccurrencesOfLatestValue()

    nextNodeId

    protected long nextNodeId()

    addNode

    protected TokenizerNode addNode(TokenizerNode parent,
                                    int nodeDepth,
                                    int tokenStartOffset,
                                    ByteRange token,
                                    int inputTokenOffset)

    appendTokenAndRepointByteRange

    protected int appendTokenAndRepointByteRange(ByteRange token,
                                                 int inputTokenOffset)

    submitMaxNodeDepthCandidate

    protected void submitMaxNodeDepthCandidate(int nodeDepth)

    getNumAdded

    public int getNumAdded()
    read


    getNodes

    public ArrayList<TokenizerNode> getNodes(boolean includeNonLeaves,
                                             boolean includeLeaves)

    appendNodes

    public void appendNodes(List<TokenizerNode> appendTo,
                            boolean includeNonLeaves,
                            boolean includeLeaves)

    getArrays

    public List<byte[]> getArrays()

    getNode

    public void getNode(TokenizerRowSearchResult resultHolder,
                        byte[] key,
                        int keyOffset,
                        int keyLength)

    setNodeFirstInsertionIndexes

    public Tokenizer setNodeFirstInsertionIndexes()
    write


    appendOutputArrayOffsets

    public Tokenizer appendOutputArrayOffsets(List<Integer> offsets)

    toString

    public String toString()
    Overrides:
    toString in class Object

    getStructuralString

    public String getStructuralString()

    getRoot

    public TokenizerNode getRoot()
    get/set


    getMaxElementLength

    public int getMaxElementLength()

    getTreeDepth

    public int getTreeDepth()


    Copyright © 2013 The Apache Software Foundation. All Rights Reserved.