1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.directory.mavibot.btree.memory;
21
22
23 import java.io.DataInputStream;
24 import java.io.DataOutputStream;
25 import java.io.File;
26 import java.io.FileInputStream;
27 import java.io.FileOutputStream;
28 import java.io.IOException;
29 import java.lang.reflect.Array;
30 import java.util.Arrays;
31 import java.util.Collections;
32 import java.util.Comparator;
33 import java.util.Iterator;
34 import java.util.NoSuchElementException;
35 import java.util.UUID;
36
37 import org.apache.directory.mavibot.btree.InMemoryBTreeBuilder;
38 import org.apache.directory.mavibot.btree.Tuple;
39 import org.apache.directory.mavibot.btree.util.TupleReaderWriter;
40
41
42
43
44
45
46
47 public class BulkDataSorter<K, V>
48 {
49 private File workDir;
50
51 private int splitAfter = 1000;
52
53 private Comparator<Tuple<K, V>> tupleComparator;
54
55 private TupleReaderWriter<K, V> readerWriter;
56
57 private boolean sorted;
58
59
60 public BulkDataSorter( TupleReaderWriter<K, V> readerWriter, Comparator<Tuple<K, V>> tupleComparator,
61 int splitAfter )
62 {
63 if ( splitAfter <= 0 )
64 {
65 throw new IllegalArgumentException( "Value of splitAfter parameter cannot be null" );
66 }
67
68 this.splitAfter = splitAfter;
69
70 this.workDir = new File( System.getProperty( "java.io.tmpdir" ), System.currentTimeMillis() + "-sort" );
71 workDir.mkdir();
72
73 this.readerWriter = readerWriter;
74 this.tupleComparator = tupleComparator;
75 }
76
77
78 public void sort( File dataFile ) throws IOException
79 {
80 int i = 0;
81
82 Tuple<K, V>[] arr = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, splitAfter );
83
84 Tuple<K, V> t = null;
85
86 DataInputStream in = new DataInputStream( new FileInputStream( dataFile ) );
87
88 while ( ( t = readerWriter.readUnsortedTuple( in ) ) != null )
89 {
90 arr[i++] = t;
91
92 if ( ( i % splitAfter ) == 0 )
93 {
94 i = 0;
95 Arrays.sort( arr, tupleComparator );
96
97 storeSortedData( arr );
98 }
99 }
100
101 if ( i != 0 )
102 {
103 Tuple<K, V>[] tmp = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, i );
104 System.arraycopy( arr, 0, tmp, 0, i );
105 Arrays.sort( tmp, tupleComparator );
106
107 storeSortedData( tmp );
108 }
109
110 sorted = true;
111 }
112
113
114 private void storeSortedData( Tuple<K, V>[] arr ) throws IOException
115 {
116 File tempFile = File.createTempFile( UUID.randomUUID().toString(), ".batch", workDir );
117 DataOutputStream out = new DataOutputStream( new FileOutputStream( tempFile ) );
118
119 for ( Tuple<K, V> t : arr )
120 {
121 readerWriter.storeSortedTuple( t, out );
122 }
123
124 out.flush();
125 out.close();
126 }
127
128
129 public File getWorkDir()
130 {
131 return workDir;
132 }
133
134
135 public Iterator<Tuple<K, V>> getMergeSortedTuples() throws IOException
136 {
137 if ( !sorted )
138 {
139 throw new IllegalStateException( "Data is not sorted" );
140 }
141
142 File[] batches = workDir.listFiles();
143
144 if ( batches.length == 0 )
145 {
146 return Collections.EMPTY_LIST.iterator();
147 }
148
149 final DataInputStream[] streams = new DataInputStream[batches.length];
150
151 for ( int i = 0; i < batches.length; i++ )
152 {
153 streams[i] = new DataInputStream( new FileInputStream( batches[i] ) );
154 }
155
156 Iterator<Tuple<K, V>> itr = new Iterator<Tuple<K, V>>()
157 {
158 private Tuple<K, V>[] heads = ( Tuple<K, V>[] ) Array.newInstance( Tuple.class, streams.length );
159
160 private Tuple<K, V> candidate = null;
161
162 private boolean closed;
163
164 private int candidatePos = -1;
165
166
167 @Override
168 public boolean hasNext()
169 {
170
171 if ( closed )
172 {
173 throw new IllegalStateException( "No elements to read" );
174 }
175
176 Tuple<K, V> available = null;
177
178 for ( int i = 0; i < streams.length; i++ )
179 {
180 if ( heads[i] == null )
181 {
182 heads[i] = readerWriter.readUnsortedTuple( streams[i] );
183 }
184
185 if ( available == null )
186 {
187 available = heads[i];
188 candidatePos = i;
189 }
190 else
191 {
192 if ( ( available != null ) && ( heads[i] != null ) )
193 {
194 int comp = tupleComparator.compare( heads[i], available );
195 if ( comp <= 0 )
196 {
197 available = heads[i];
198 candidatePos = i;
199 }
200 }
201 }
202 }
203
204 heads[candidatePos] = null;
205
206 if ( available == null )
207 {
208 for ( int i = 0; i < streams.length; i++ )
209 {
210 if ( heads[i] != null )
211 {
212 available = heads[i];
213 heads[i] = readerWriter.readUnsortedTuple( streams[i] );
214 break;
215 }
216 }
217 }
218
219 if ( available != null )
220 {
221 candidate = available;
222 return true;
223 }
224
225
226 for ( DataInputStream in : streams )
227 {
228 try
229 {
230 in.close();
231 }
232 catch ( Exception e )
233 {
234 e.printStackTrace();
235 }
236 }
237
238 closed = true;
239
240 return false;
241 }
242
243
244 @Override
245 public Tuple<K, V> next()
246 {
247 if ( candidate == null )
248 {
249 if ( !closed )
250 {
251 hasNext();
252 }
253 }
254
255 if ( candidate == null )
256 {
257 throw new NoSuchElementException( "No tuples found" );
258 }
259
260 return candidate;
261 }
262
263
264 @Override
265 public void remove()
266 {
267 throw new UnsupportedOperationException( "Not supported" );
268 }
269
270 };
271
272 return itr;
273 }
274 }