View Javadoc
1   /*
2    *   Licensed to the Apache Software Foundation (ASF) under one
3    *   or more contributor license agreements.  See the NOTICE file
4    *   distributed with this work for additional information
5    *   regarding copyright ownership.  The ASF licenses this file
6    *   to you under the Apache License, Version 2.0 (the
7    *   "License"); you may not use this file except in compliance
8    *   with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *   Unless required by applicable law or agreed to in writing,
13   *   software distributed under the License is distributed on an
14   *   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *   KIND, either express or implied.  See the License for the
16   *   specific language governing permissions and limitations
17   *   under the License.
18   *
19   */
20  package org.apache.directory.mavibot.btree.persisted;
21  
22  
23  import java.io.DataInputStream;
24  import java.io.DataOutputStream;
25  import java.io.File;
26  import java.io.FileInputStream;
27  import java.io.FileOutputStream;
28  import java.io.IOException;
29  import java.lang.reflect.Array;
30  import java.util.Arrays;
31  import java.util.Collections;
32  import java.util.Comparator;
33  import java.util.Iterator;
34  import java.util.NoSuchElementException;
35  import java.util.UUID;
36  
37  import org.apache.directory.mavibot.btree.PersistedBTreeBuilder;
38  import org.apache.directory.mavibot.btree.Tuple;
39  import org.apache.directory.mavibot.btree.util.TupleReaderWriter;
40  
41  
42  /**
43   * A utility class for sorting a large number of keys before building a BTree using {@link PersistedBTreeBuilder}.
44   *
45   * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
46   */
47  public class BulkDataSorter<K, V>
48  {
49      private File workDir;
50  
51      private int splitAfter = 1000;
52  
53      private Comparator<Tuple<K, V>> tupleComparator;
54  
55      private TupleReaderWriter<K, V> readerWriter;
56  
57      private boolean sorted;
58  
59  
60      public BulkDataSorter( TupleReaderWriter<K, V> readerWriter, Comparator<Tuple<K, V>> tupleComparator,
61          int splitAfter )
62      {
63          if ( splitAfter <= 0 )
64          {
65              throw new IllegalArgumentException( "Value of splitAfter parameter cannot be null" );
66          }
67  
68          this.splitAfter = splitAfter;
69  
70          this.workDir = new File( System.getProperty( "java.io.tmpdir" ), System.currentTimeMillis() + "-sort" );
71          workDir.mkdir();
72  
73          this.readerWriter = readerWriter;
74          this.tupleComparator = tupleComparator;
75      }
76  
77  
78      public void sort( File dataFile ) throws IOException
79      {
80          int i = 0;
81  
82          Tuple<K, V>[] arr = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, splitAfter );
83  
84          Tuple<K, V> t = null;
85  
86          DataInputStream in = new DataInputStream( new FileInputStream( dataFile ) );
87  
88          while ( ( t = readerWriter.readUnsortedTuple( in ) ) != null )
89          {
90              arr[i++] = t;
91  
92              if ( ( i % splitAfter ) == 0 )
93              {
94                  i = 0;
95                  Arrays.sort( arr, tupleComparator );
96  
97                  storeSortedData( arr );
98              }
99          }
100 
101         if ( i != 0 )
102         {
103             Tuple<K, V>[] tmp = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, i );
104             System.arraycopy( arr, 0, tmp, 0, i );
105             Arrays.sort( tmp, tupleComparator );
106 
107             storeSortedData( tmp );
108         }
109 
110         sorted = true;
111     }
112 
113 
114     private void storeSortedData( Tuple<K, V>[] arr ) throws IOException
115     {
116         File tempFile = File.createTempFile( UUID.randomUUID().toString(), ".batch", workDir );
117         DataOutputStream out = new DataOutputStream( new FileOutputStream( tempFile ) );
118 
119         for ( Tuple<K, V> t : arr )
120         {
121             readerWriter.storeSortedTuple( t, out );
122         }
123 
124         out.flush();
125         out.close();
126     }
127 
128 
129     public File getWorkDir()
130     {
131         return workDir;
132     }
133 
134 
135     public Iterator<Tuple<K, V>> getMergeSortedTuples() throws IOException
136     {
137         if ( !sorted )
138         {
139             throw new IllegalStateException( "Data is not sorted" );
140         }
141 
142         File[] batches = workDir.listFiles();
143 
144         if ( batches.length == 0 )
145         {
146             return Collections.EMPTY_LIST.iterator();
147         }
148 
149         final DataInputStream[] streams = new DataInputStream[batches.length];
150 
151         for ( int i = 0; i < batches.length; i++ )
152         {
153             streams[i] = new DataInputStream( new FileInputStream( batches[i] ) );
154         }
155 
156         Iterator<Tuple<K, V>> itr = new Iterator<Tuple<K, V>>()
157         {
158             private Tuple<K, V>[] heads = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, streams.length );
159 
160             private Tuple<K, V> candidate = null;
161 
162             private boolean closed;
163 
164             private int candidatePos = -1;
165 
166 
167             @Override
168             public boolean hasNext()
169             {
170 
171                 if ( closed )
172                 {
173                     throw new IllegalStateException( "No elements to read" );
174                 }
175 
176                 Tuple<K, V> available = null;
177 
178                 for ( int i = 0; i < streams.length; i++ )
179                 {
180                     if ( heads[i] == null )
181                     {
182                         heads[i] = readerWriter.readSortedTuple( streams[i] );
183                     }
184 
185                     if ( available == null )
186                     {
187                         available = heads[i];
188                         candidatePos = i;
189                     }
190                     else
191                     {
192                         if ( ( available != null ) && ( heads[i] != null ) )
193                         {
194                             int comp = tupleComparator.compare( heads[i], available );
195                             if ( comp <= 0 )
196                             {
197                                 available = heads[i];
198                                 candidatePos = i;
199                             }
200                         }
201                     }
202                 }
203 
204                 heads[candidatePos] = null;
205 
206                 if ( available == null )
207                 {
208                     for ( int i = 0; i < streams.length; i++ )
209                     {
210                         if ( heads[i] != null )
211                         {
212                             available = heads[i];
213                             heads[i] = readerWriter.readUnsortedTuple( streams[i] );
214                             break;
215                         }
216                     }
217                 }
218 
219                 if ( available != null )
220                 {
221                     candidate = available;
222                     return true;
223                 }
224 
225                 // finally close the streams
226                 for ( DataInputStream in : streams )
227                 {
228                     try
229                     {
230                         in.close();
231                     }
232                     catch ( Exception e )
233                     {
234                         e.printStackTrace();
235                     }
236                 }
237 
238                 closed = true;
239 
240                 return false;
241             }
242 
243 
244             @Override
245             public Tuple<K, V> next()
246             {
247                 if ( candidate == null )
248                 {
249                     if ( !closed )
250                     {
251                         hasNext();
252                     }
253                 }
254 
255                 if ( candidate == null )
256                 {
257                     throw new NoSuchElementException( "No tuples found" );
258                 }
259 
260                 return candidate;
261             }
262 
263 
264             @Override
265             public void remove()
266             {
267                 throw new UnsupportedOperationException( "Not supported" );
268             }
269 
270         };
271 
272         return itr;
273     }
274 }