View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one
3    *  or more contributor license agreements.  See the NOTICE file
4    *  distributed with this work for additional information
5    *  regarding copyright ownership.  The ASF licenses this file
6    *  to you under the Apache License, Version 2.0 (the
7    *  "License"); you may not use this file except in compliance
8    *  with the License.  You may obtain a copy of the License at
9    *
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *  Unless required by applicable law or agreed to in writing,
13   *  software distributed under the License is distributed on an
14   *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *  KIND, either express or implied.  See the License for the
16   *  specific language governing permissions and limitations
17   *  under the License.
18   *
19   */
20  package org.apache.directory.mavibot.btree;
21  
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.RandomAccessFile;
26  import java.nio.ByteBuffer;
27  import java.nio.channels.FileChannel;
28  import java.util.ArrayList;
29  import java.util.HashMap;
30  import java.util.HashSet;
31  import java.util.LinkedHashMap;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Queue;
35  import java.util.Set;
36  import java.util.concurrent.ConcurrentHashMap;
37  import java.util.concurrent.LinkedBlockingQueue;
38  import java.util.concurrent.atomic.AtomicLong;
39  import java.util.concurrent.locks.Lock;
40  import java.util.concurrent.locks.ReadWriteLock;
41  import java.util.concurrent.locks.ReentrantLock;
42  import java.util.concurrent.locks.ReentrantReadWriteLock;
43  
44  import org.apache.directory.mavibot.btree.exception.BTreeAlreadyManagedException;
45  import org.apache.directory.mavibot.btree.exception.BTreeCreationException;
46  import org.apache.directory.mavibot.btree.exception.EndOfFileExceededException;
47  import org.apache.directory.mavibot.btree.exception.FileException;
48  import org.apache.directory.mavibot.btree.exception.InvalidOffsetException;
49  import org.apache.directory.mavibot.btree.exception.KeyNotFoundException;
50  import org.apache.directory.mavibot.btree.exception.RecordManagerException;
51  import org.apache.directory.mavibot.btree.serializer.ElementSerializer;
52  import org.apache.directory.mavibot.btree.serializer.IntSerializer;
53  import org.apache.directory.mavibot.btree.serializer.LongArraySerializer;
54  import org.apache.directory.mavibot.btree.serializer.LongSerializer;
55  import org.apache.directory.mavibot.btree.util.Strings;
56  import org.slf4j.Logger;
57  import org.slf4j.LoggerFactory;
58  
59  
60  /**
61   * The RecordManager is used to manage the file in which we will store the B-trees.
62   * A RecordManager will manage more than one B-tree.<br/>
63   *
64   * It stores data in fixed size pages (default size is 512 bytes), which may be linked one to
65   * the other if the data we want to store is too big for a page.
66   *
67   * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
68   */
69  public class RecordManager extends AbstractTransactionManager
70  {
71      /** The LoggerFactory used by this class */
72      protected static final Logger LOG = LoggerFactory.getLogger( RecordManager.class );
73  
74      /** The LoggerFactory used by this class */
75      protected static final Logger LOG_PAGES = LoggerFactory.getLogger( "org.apache.directory.mavibot.LOG_PAGES" );
76  
77      /** A dedicated logger for the check */
78      protected static final Logger LOG_CHECK = LoggerFactory.getLogger( "org.apache.directory.mavibot.LOG_CHECK" );
79  
80      /** The associated file */
81      private File file;
82  
83      /** The channel used to read and write data */
84      /* no qualifier */ FileChannel fileChannel;
85  
86      /** The number of managed B-trees */
87      /* no qualifier */ int nbBtree;
88  
89      /** The first and last free page */
90      /* no qualifier */ long firstFreePage;
91  
92      /** The list of available free pages */
93      List<PageIO> freePages = new ArrayList<PageIO>();
94  
95      /** Some counters to track the number of free pages */
96      public AtomicLong nbFreedPages = new AtomicLong( 0 );
97      public AtomicLong nbCreatedPages = new AtomicLong( 0 );
98      public AtomicLong nbReusedPages = new AtomicLong( 0 );
99      public AtomicLong nbUpdateRMHeader = new AtomicLong( 0 );
100     public AtomicLong nbUpdateBtreeHeader = new AtomicLong( 0 );
101     public AtomicLong nbUpdatePageIOs = new AtomicLong( 0 );
102 
103     /** The offset of the end of the file */
104     private long endOfFileOffset;
105 
106     /**
107      * A Map used to hold the pages that were copied in a new version.
108      * Those pages can be reclaimed when the associated version is dead.
109      * 
110      * Note: the offsets are of AbstractPageS' while freeing the associated
111      *       PageIOs will be fetched and freed.
112      **/
113     /* no qualifier */ Map<RevisionName, long[]> copiedPageMap = null;
114 
115     /** A constant for an offset on a non existing page */
116     public static final long NO_PAGE = -1L;
117 
118     /** The number of element we can store in a page */
119     private static final int PAGE_SIZE = 4;
120 
121     /** The size of the link to next page */
122     private static final int LINK_SIZE = 8;
123 
124     /** Some constants */
125     private static final int BYTE_SIZE = 1;
126     /* no qualifier */ static final int INT_SIZE = 4;
127     /* no qualifier */ static final int LONG_SIZE = 8;
128 
129     /** The default page size */
130     public static final int DEFAULT_PAGE_SIZE = 512;
131 
132     /** The minimal page size. Can't be below 64, as we have to store many thing sin the RMHeader */
133     private static final int MIN_PAGE_SIZE = 64;
134 
135     /** The RecordManager header size */
136     /* no qualifier */ static int RECORD_MANAGER_HEADER_SIZE = DEFAULT_PAGE_SIZE;
137 
138     /** A global buffer used to store the RecordManager header */
139     private ByteBuffer RECORD_MANAGER_HEADER_BUFFER;
140 
141     /** A static buffer used to store the RecordManager header */
142     private byte[] RECORD_MANAGER_HEADER_BYTES;
143 
144     /** The length of an Offset, as a negative value */
145     private  byte[] LONG_LENGTH = new byte[]
146         { ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xF8 };
147 
148     /** The RecordManager underlying page size. */
149     /* no qualifier */ int pageSize = DEFAULT_PAGE_SIZE;
150 
151     /** The set of managed B-trees */
152     private Map<String, BTree<Object, Object>> managedBtrees;
153     
154     /** The queue of recently closed transactions */
155     private Queue<RevisionName> closedTransactionsQueue = new LinkedBlockingQueue<RevisionName>();
156 
157     /** The default file name */
158     private static final String DEFAULT_FILE_NAME = "mavibot.db";
159 
160     /** A flag set to true if we want to keep old revisions */
161     private boolean keepRevisions;
162 
163     /** A flag used by internal btrees */
164     public static final boolean INTERNAL_BTREE = true;
165 
166     /** A flag used by internal btrees */
167     public static final boolean NORMAL_BTREE = false;
168 
169     /** The B-tree of B-trees */
170     private BTree<NameRevision, Long> btreeOfBtrees;
171 
172     /** The B-tree of B-trees management btree name */
173     /* no qualifier */ static final String BTREE_OF_BTREES_NAME = "_btree_of_btrees_";
174 
175     /** The CopiedPages management btree name */
176     /* no qualifier */ static final String COPIED_PAGE_BTREE_NAME = "_copiedPageBtree_";
177 
178     /** The current B-tree of B-trees header offset */
179     /* no qualifier */ long currentBtreeOfBtreesOffset;
180 
181     /** The previous B-tree of B-trees header offset */
182     private long previousBtreeOfBtreesOffset = NO_PAGE;
183 
184     /** A lock to protect the transaction handling */
185     private Lock transactionLock = new ReentrantLock();
186     
187     /** A ThreadLocalStorage used to store the current transaction */
188     private static final ThreadLocal<Integer> context = new ThreadLocal<Integer>();
189 
190     /** The list of PageIO that can be freed after a commit */
191     List<PageIO> freedPages = new ArrayList<PageIO>();
192 
193     /** The list of PageIO that can be freed after a roolback */
194     private List<PageIO> allocatedPages = new ArrayList<PageIO>();
195     
196     /** A Map keeping the latest revisions for each managed BTree */
197     private Map<String, BTreeHeader<?, ?>> currentBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
198 
199     /** A Map storing the new revisions when some change have been made in some BTrees */
200     private Map<String, BTreeHeader<?, ?>> newBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
201     
202     /** A lock to protect the BtreeHeader maps */
203     private ReadWriteLock btreeHeadersLock = new ReentrantReadWriteLock();
204     
205     /** A value stored into the transaction context for rollbacked transactions */
206     private static final int ROLLBACKED_TXN = 0;
207 
208     /** A lock to protect the freepage pointers */
209     private ReentrantLock freePageLock = new ReentrantLock();
210 
211     /** the space reclaimer */
212     private SpaceReclaimer reclaimer;
213     
214     /** variable to keep track of the write commit count */
215     private int commitCount = 0;
216     
217     /** the threshold at which the SpaceReclaimer will be run to free the copied pages */
218     private int spaceReclaimerThreshold = 200;
219     
220     /**
221      * Create a Record manager which will either create the underlying file
222      * or load an existing one. If a folder is provided, then we will create
223      * a file with a default name : mavibot.db
224      *
225      * @param name The file name, or a folder name
226      */
227     public RecordManager( String fileName )
228     {
229         this( fileName, DEFAULT_PAGE_SIZE );
230     }
231 
232 
233     /**
234      * Create a Record manager which will either create the underlying file
235      * or load an existing one. If a folder is provider, then we will create
236      * a file with a default name : mavibot.db
237      *
238      * @param name The file name, or a folder name
239      * @param pageSize the size of a page on disk, in bytes
240      */
241     public RecordManager( String fileName, int pageSize )
242     {
243         managedBtrees = new LinkedHashMap<String, BTree<Object, Object>>();
244 
245         if ( pageSize < MIN_PAGE_SIZE )
246         {
247             this.pageSize = MIN_PAGE_SIZE;
248         }
249         else
250         {
251             this.pageSize = pageSize;
252         }
253 
254         RECORD_MANAGER_HEADER_BUFFER = ByteBuffer.allocate( this.pageSize );
255         RECORD_MANAGER_HEADER_BYTES = new byte[this.pageSize];
256         RECORD_MANAGER_HEADER_SIZE = this.pageSize;
257 
258         // Open the file or create it
259         File tmpFile = new File( fileName );
260 
261         if ( tmpFile.isDirectory() )
262         {
263             // It's a directory. Check that we don't have an existing mavibot file
264             tmpFile = new File( tmpFile, DEFAULT_FILE_NAME );
265         }
266 
267         // We have to create a new file, if it does not already exist
268         boolean isNewFile = createFile( tmpFile );
269 
270         try
271         {
272             RandomAccessFile randomFile = new RandomAccessFile( file, "rw" );
273             fileChannel = randomFile.getChannel();
274 
275             // get the current end of file offset
276             endOfFileOffset = fileChannel.size();
277 
278             if ( isNewFile )
279             {
280                 initRecordManager();
281             }
282             else
283             {
284                 loadRecordManager();
285             }
286             
287             reclaimer = new SpaceReclaimer( this );
288             
289             copiedPageMap = reclaimer.readCopiedPageMap( file.getParentFile() );
290             runReclaimer();
291         }
292         catch ( Exception e )
293         {
294             LOG.error( "Error while initializing the RecordManager : {}", e.getMessage() );
295             LOG.error( "", e );
296             throw new RecordManagerException( e );
297         }
298     }
299 
300     
301     /**
302      * runs the SpaceReclaimer to free the copied pages
303      */
304     private void runReclaimer()
305     {
306         try
307         {
308             commitCount = 0;
309             reclaimer.reclaim();
310         }
311         catch( Exception e )
312         {
313             LOG.warn( "SpaceReclaimer failed to free the pages", e );
314         }
315     }
316 
317     
318     /**
319      * Create the mavibot file if it does not exist
320      */
321     private boolean createFile( File mavibotFile )
322     {
323         try
324         {
325             boolean creation = mavibotFile.createNewFile();
326 
327             file = mavibotFile;
328 
329             if ( mavibotFile.length() == 0 )
330             {
331                 return true;
332             }
333             else
334             {
335                 return creation;
336             }
337         }
338         catch ( IOException ioe )
339         {
340             LOG.error( "Cannot create the file {}", mavibotFile.getName() );
341             return false;
342         }
343     }
344 
345 
346     /**
347      * We will create a brand new RecordManager file, containing nothing, but the RecordManager header,
348      * a B-tree to manage the old revisions we want to keep and
349      * a B-tree used to manage pages associated with old versions.
350      * <br/>
351      * The RecordManager header contains the following details :
352      * <pre>
353      * +--------------------------+
354      * | PageSize                 | 4 bytes : The size of a physical page (default to 4096)
355      * +--------------------------+
356      * |  NbTree                  | 4 bytes : The number of managed B-trees (at least 1)
357      * +--------------------------+
358      * | FirstFree                | 8 bytes : The offset of the first free page
359      * +--------------------------+
360      * | current BoB offset       | 8 bytes : The offset of the current BoB
361      * +--------------------------+
362      * | previous BoB offset      | 8 bytes : The offset of the previous BoB
363      * +--------------------------+
364      * | current CP btree offset  | 8 bytes : The offset of the current BoB
365      * +--------------------------+
366      * | previous CP btree offset | 8 bytes : The offset of the previous BoB
367      * +--------------------------+
368      * </pre>
369      *
370      * We then store the B-tree managing the pages that have been copied when we have added
371      * or deleted an element in the B-tree. They are associated with a version.
372      *
373      * Last, we add the bTree that keep a track on each revision we can have access to.
374      */
375     private void initRecordManager() throws IOException
376     {
377         // Create a new Header
378         nbBtree = 0;
379         firstFreePage = NO_PAGE;
380         currentBtreeOfBtreesOffset = 0L;
381 
382         updateRecordManagerHeader();
383 
384         // Set the offset of the end of the file
385         endOfFileOffset = fileChannel.size();
386 
387         // First, create the btree of btrees <NameRevision, Long>
388         createBtreeOfBtrees();
389 
390         // Inject these B-trees into the RecordManager. They are internal B-trees.
391         try
392         {
393             manage( btreeOfBtrees, INTERNAL_BTREE );
394 
395             currentBtreeOfBtreesOffset = ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader().getBTreeHeaderOffset();
396             updateRecordManagerHeader();
397             
398             // Inject the BtreeOfBtrees into the currentBtreeHeaders map
399             currentBTreeHeaders.put( BTREE_OF_BTREES_NAME,  ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
400             newBTreeHeaders.put( BTREE_OF_BTREES_NAME,  ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
401         }
402         catch ( BTreeAlreadyManagedException btame )
403         {
404             // Can't happen here.
405         }
406 
407         // We are all set ! Verify the file
408         if ( LOG_CHECK.isDebugEnabled() )
409         {
410             MavibotInspector.check( this );
411         }
412 
413     }
414 
415 
416     /**
417      * Create the B-treeOfBtrees
418      */
419     private void createBtreeOfBtrees()
420     {
421         PersistedBTreeConfiguration<NameRevision, Long> configuration = new PersistedBTreeConfiguration<NameRevision, Long>();
422         configuration.setKeySerializer( NameRevisionSerializer.INSTANCE );
423         configuration.setName( BTREE_OF_BTREES_NAME );
424         configuration.setValueSerializer( LongSerializer.INSTANCE );
425         configuration.setBtreeType( BTreeTypeEnum.BTREE_OF_BTREES );
426         configuration.setCacheSize( PersistedBTree.DEFAULT_CACHE_SIZE );
427 
428         btreeOfBtrees = BTreeFactory.createPersistedBTree( configuration );
429     }
430 
431 
432     /**
433      * Load the BTrees from the disk.
434      *
435      * @throws InstantiationException
436      * @throws IllegalAccessException
437      * @throws ClassNotFoundException
438      * @throws NoSuchFieldException
439      * @throws SecurityException
440      * @throws IllegalArgumentException
441      */
442     private void loadRecordManager() throws IOException, ClassNotFoundException, IllegalAccessException,
443         InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException, KeyNotFoundException
444     {
445         if ( fileChannel.size() != 0 )
446         {
447             ByteBuffer recordManagerHeader = ByteBuffer.allocate( RECORD_MANAGER_HEADER_SIZE );
448 
449             // The file exists, we have to load the data now
450             fileChannel.read( recordManagerHeader );
451 
452             recordManagerHeader.rewind();
453 
454             // read the RecordManager Header :
455             // +---------------------+
456             // | PageSize            | 4 bytes : The size of a physical page (default to 4096)
457             // +---------------------+
458             // | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
459             // +---------------------+
460             // | FirstFree           | 8 bytes : The offset of the first free page
461             // +---------------------+
462             // | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
463             // +---------------------+
464             // | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
465             // +---------------------+
466             // | current CP offset   | 8 bytes : The offset of the current Copied Pages B-tree
467             // +---------------------+
468             // | previous CP offset  | 8 bytes : The offset of the previous Copied Pages B-tree
469             // +---------------------+
470 
471             // The page size
472             pageSize = recordManagerHeader.getInt();
473 
474             // The number of managed B-trees
475             nbBtree = recordManagerHeader.getInt();
476 
477             // The first and last free page
478             firstFreePage = recordManagerHeader.getLong();
479 
480             // The current BOB offset
481             currentBtreeOfBtreesOffset = recordManagerHeader.getLong();
482 
483             // The previous BOB offset
484             previousBtreeOfBtreesOffset = recordManagerHeader.getLong();
485 
486             // read the B-tree of B-trees
487             PageIO[] bobHeaderPageIos = readPageIOs( currentBtreeOfBtreesOffset, Long.MAX_VALUE );
488 
489             btreeOfBtrees = BTreeFactory.<NameRevision, Long> createPersistedBTree( BTreeTypeEnum.BTREE_OF_BTREES );
490             //BTreeFactory.<NameRevision, Long> setBtreeHeaderOffset( ( PersistedBTree<NameRevision, Long> )btreeOfBtrees, currentBtreeOfBtreesOffset );
491 
492             loadBtree( bobHeaderPageIos, btreeOfBtrees );
493 
494             // Now, read all the B-trees from the btree of btrees
495             TupleCursor<NameRevision, Long> btreeCursor = btreeOfBtrees.browse();
496             Map<String, Long> loadedBtrees = new HashMap<String, Long>();
497 
498             // loop on all the btrees we have, and keep only the latest revision
499             long currentRevision = -1L;
500 
501             while ( btreeCursor.hasNext() )
502             {
503                 Tuple<NameRevision, Long> btreeTuple = btreeCursor.next();
504                 NameRevision nameRevision = btreeTuple.getKey();
505                 long btreeOffset = btreeTuple.getValue();
506                 long revision = nameRevision.getValue();
507 
508                 // Check if we already have processed this B-tree
509                 Long loadedBtreeRevision = loadedBtrees.get( nameRevision.getName() );
510 
511                 if ( loadedBtreeRevision != null )
512                 {
513                     // The btree has already been loaded. The revision is necessarily higher
514                     if ( revision > currentRevision )
515                     {
516                         // We have a newer revision : switch to the new revision (we keep the offset atm)
517                         loadedBtrees.put( nameRevision.getName(), btreeOffset );
518                         currentRevision = revision;
519                     }
520                 }
521                 else
522                 {
523                     // This is a new B-tree
524                     loadedBtrees.put( nameRevision.getName(), btreeOffset );
525                     currentRevision = nameRevision.getRevision();
526                 }
527             }
528 
529             // TODO : clean up the old revisions...
530 
531 
532             // Now, we can load the real btrees using the offsets
533             for ( String btreeName : loadedBtrees.keySet() )
534             {
535                 long btreeOffset = loadedBtrees.get( btreeName );
536 
537                 PageIO[] btreePageIos = readPageIOs( btreeOffset, Long.MAX_VALUE );
538 
539                 BTree<?, ?> btree = BTreeFactory.<NameRevision, Long> createPersistedBTree();
540                 //( ( PersistedBTree<NameRevision, Long> ) btree ).setBtreeHeaderOffset( btreeOffset );
541                 loadBtree( btreePageIos, btree );
542 
543                 // Add the btree into the map of managed B-trees
544                 managedBtrees.put( btreeName, ( BTree<Object, Object> ) btree );
545             }
546 
547             // We are done ! Let's finish with the last initialization parts
548             endOfFileOffset = fileChannel.size();
549         }
550     }
551 
552 
553     /**
554      * Starts a transaction
555      */
556     public void beginTransaction()
557     {
558         // First, take the lock
559         transactionLock.lock();
560         
561         // Now, check the TLS state
562         incrementTxnLevel();
563     }
564 
565 
566     /**
567      * Commits a transaction
568      */
569     public void commit()
570     {
571         if ( !fileChannel.isOpen() )
572         {
573             // The file has been closed, nothing remains to commit, let's get out
574             transactionLock.unlock();
575             
576             // Still we have to decrement the TransactionLevel
577             decrementTxnLevel();
578             
579             return;
580         }
581 
582         int nbTxnStarted = context.get();
583         
584         switch ( nbTxnStarted )
585         {
586             case ROLLBACKED_TXN :
587                 // The transaction was rollbacked, quit immediatelly
588                 transactionLock.unlock();
589                 
590                 return;
591             
592             case 1 :
593                 // We are done with the transaction, we can update the RMHeader and swap the BTreeHeaders
594                 // First update the RMHeader to be sure that we have a way to restore from a crash
595                 updateRecordManagerHeader();
596                 
597                 // Swap the BtreeHeaders maps
598                 swapCurrentBtreeHeaders();
599         
600                 // We can now free pages
601                 for ( PageIO pageIo : freedPages )
602                 {
603                     try
604                     {
605                         free( pageIo );
606                     }
607                     catch ( IOException ioe )
608                     {
609                         throw new RecordManagerException( ioe.getMessage() );
610                     }
611                 }
612         
613                 // Release the allocated and freed pages list
614                 freedPages.clear();
615                 allocatedPages.clear();
616         
617                 // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
618                 // here, we have to erase the old references to keep only the new ones.
619                 updateRecordManagerHeader();
620                 
621                 // And decrement the number of started transactions
622                 decrementTxnLevel();
623 
624                 commitCount++;
625                 
626                 if( commitCount >= spaceReclaimerThreshold )
627                 {
628                     runReclaimer();
629                 }
630                 
631                 // Finally, release the global lock
632                 transactionLock.unlock();
633                 
634                 return;
635                 
636             default :
637                 // We are inner an existing transaction. Just update the necessary elements
638                 // Update the RMHeader to be sure that we have a way to restore from a crash
639                 updateRecordManagerHeader();
640                 
641                 // Swap the BtreeHeaders maps
642                 //swapCurrentBtreeHeaders();
643         
644                 // We can now free pages
645                 for ( PageIO pageIo : freedPages )
646                 {
647                     try
648                     {
649                         free( pageIo );
650                     }
651                     catch ( IOException ioe )
652                     {
653                         throw new RecordManagerException( ioe.getMessage() );
654                     }
655                 }
656         
657                 // Release the allocated and freed pages list
658                 freedPages.clear();
659                 allocatedPages.clear();
660         
661                 // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
662                 // here, we have to erase the old references to keep only the new ones.
663                 updateRecordManagerHeader();
664                 
665                 // And decrement the number of started transactions
666                 decrementTxnLevel();
667 
668                 commitCount++;
669                 
670                 if( commitCount >= spaceReclaimerThreshold )
671                 {
672                     runReclaimer();
673                 }
674 
675                 // Finally, release the global lock
676                 transactionLock.unlock();
677                 
678                 return;
679         }
680     }
681     
682     
683     public boolean isContextOk()
684     {
685         return ( context == null ? true : ( context.get() == 0 ) );
686     }
687     
688     /**
689      * Increment the transactionLevel
690      */
691     private void incrementTxnLevel()
692     {
693         Integer nbTxnLevel = context.get();
694         
695         if ( nbTxnLevel == null )
696         {
697             context.set( 1 );
698         }
699         else
700         {
701             // And increment the counter of inner txn.
702             context.set( nbTxnLevel + 1 );
703         }
704         
705         /*
706         System.out.println( "Incrementing : " + context.get() );
707         
708         if ( context.get() == 0 )
709         {
710             System.out.println( "-------------" );
711         }
712         */
713     }
714     
715     
716     /**
717      * Decrement the transactionLevel
718      */
719     private void decrementTxnLevel()
720     {
721         int nbTxnStarted = context.get();
722 
723         context.set(  nbTxnStarted - 1 );
724         
725         //System.out.println( "Incrementing : " + context.get() );
726     }
727 
728 
729     /**
730      * Rollback a transaction
731      */
732     public void rollback()
733     {
734         // Reset the counter
735         context.set( ROLLBACKED_TXN );
736 
737         // We can now free allocated pages, this is the end of the transaction
738         for ( PageIO pageIo : allocatedPages )
739         {
740             try
741             {
742                 free( pageIo );
743             }
744             catch ( IOException ioe )
745             {
746                 throw new RecordManagerException( ioe.getMessage() );
747             }
748         }
749 
750         // Release the allocated and freed pages list
751         freedPages.clear();
752         allocatedPages.clear();
753 
754         // And update the RMHeader
755         updateRecordManagerHeader();
756         
757         // And restore the BTreeHeaders new Map to the current state
758         revertBtreeHeaders();
759 
760         transactionLock.unlock();
761     }
762 
763 
764     /**
765      * Reads all the PageIOs that are linked to the page at the given position, including
766      * the first page.
767      *
768      * @param position The position of the first page
769      * @param limit The maximum bytes to read. Set this value to -1 when the size is unknown.
770      * @return An array of pages
771      */
772     /*no qualifier*/ PageIO[] readPageIOs( long position, long limit ) throws IOException, EndOfFileExceededException
773     {
774         LOG.debug( "Read PageIOs at position {}", position );
775 
776         if ( limit <= 0 )
777         {
778             limit = Long.MAX_VALUE;
779         }
780 
781         PageIO firstPage = fetchPage( position );
782         firstPage.setSize();
783         List<PageIO> listPages = new ArrayList<PageIO>();
784         listPages.add( firstPage );
785         long dataRead = pageSize - LONG_SIZE - INT_SIZE;
786 
787         // Iterate on the pages, if needed
788         long nextPage = firstPage.getNextPage();
789 
790         if ( ( dataRead < limit ) && ( nextPage != NO_PAGE ) )
791         {
792             while ( dataRead < limit )
793             {
794                 PageIO page = fetchPage( nextPage );
795                 listPages.add( page );
796                 nextPage = page.getNextPage();
797                 dataRead += pageSize - LONG_SIZE;
798 
799                 if ( nextPage == NO_PAGE )
800                 {
801                     page.setNextPage( NO_PAGE );
802                     break;
803                 }
804             }
805         }
806 
807         LOG.debug( "Nb of PageIOs read : {}", listPages.size() );
808 
809         // Return
810         return listPages.toArray( new PageIO[]
811             {} );
812     }
813 
814 
815     /**
816      * Check the offset to be sure it's a valid one :
817      * <ul>
818      * <li>It's >= 0</li>
819      * <li>It's below the end of the file</li>
820      * <li>It's a multipl of the pageSize
821      * </ul>
822      * @param offset The offset to check
823      * @throws InvalidOffsetException If the offset is not valid
824      */
825     /* no qualifier */ void checkOffset( long offset )
826     {
827         if ( ( offset < 0 ) || ( offset > endOfFileOffset ) || ( ( offset % pageSize ) != 0 ) )
828         {
829             throw new InvalidOffsetException( "Bad Offset : " + offset );
830         }
831     }
832 
833 
834     /**
835      * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
836      * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
837      *
838      * @param pageIos The list of pages containing the meta-data
839      * @param btree The B-tree we have to initialize
840      * @throws InstantiationException
841      * @throws IllegalAccessException
842      * @throws ClassNotFoundException
843      * @throws NoSuchFieldException
844      * @throws SecurityException
845      * @throws IllegalArgumentException
846      */
847     private <K, V> void loadBtree( PageIO[] pageIos, BTree<K, V> btree ) throws EndOfFileExceededException,
848         IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
849     {
850         loadBtree( pageIos, btree, null );
851     }
852 
853 
854     /**
855      * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
856      * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
857      *
858      * @param pageIos The list of pages containing the meta-data
859      * @param btree The B-tree we have to initialize
860      * @throws InstantiationException
861      * @throws IllegalAccessException
862      * @throws ClassNotFoundException
863      * @throws NoSuchFieldException
864      * @throws SecurityException
865      * @throws IllegalArgumentException
866      */
867     /* no qualifier */ <K, V> void loadBtree( PageIO[] pageIos, BTree btree, BTree<K, V> parentBTree ) throws EndOfFileExceededException,
868         IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
869     {
870         long dataPos = 0L;
871 
872         // Process the B-tree header
873         BTreeHeader<K, V> btreeHeader = new BTreeHeader<K, V>();
874         btreeHeader.setBtree( btree );
875 
876         // The BtreeHeader offset
877         btreeHeader.setBTreeHeaderOffset( pageIos[0].getOffset() );
878 
879         // The B-tree current revision
880         long revision = readLong( pageIos, dataPos );
881         btreeHeader.setRevision( revision );
882         dataPos += LONG_SIZE;
883 
884         // The nb elems in the tree
885         long nbElems = readLong( pageIos, dataPos );
886         btreeHeader.setNbElems( nbElems );
887         dataPos += LONG_SIZE;
888 
889         // The B-tree rootPage offset
890         long rootPageOffset = readLong( pageIos, dataPos );
891         btreeHeader.setRootPageOffset( rootPageOffset );
892         dataPos += LONG_SIZE;
893 
894         // The B-tree information offset
895         long btreeInfoOffset = readLong( pageIos, dataPos );
896 
897         // Now, process the common informations
898         PageIO[] infoPageIos = readPageIOs( btreeInfoOffset, Long.MAX_VALUE );
899         ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( infoPageIos[0].getOffset() );
900         dataPos = 0L;
901 
902         // The B-tree page size
903         int btreePageSize = readInt( infoPageIos, dataPos );
904         BTreeFactory.setPageSize( btree, btreePageSize );
905         dataPos += INT_SIZE;
906 
907         // The tree name
908         ByteBuffer btreeNameBytes = readBytes( infoPageIos, dataPos );
909         dataPos += INT_SIZE + btreeNameBytes.limit();
910         String btreeName = Strings.utf8ToString( btreeNameBytes );
911         BTreeFactory.setName( btree, btreeName );
912 
913         // The keySerializer FQCN
914         ByteBuffer keySerializerBytes = readBytes( infoPageIos, dataPos );
915         dataPos += INT_SIZE + keySerializerBytes.limit();
916 
917         String keySerializerFqcn = "";
918 
919         if ( keySerializerBytes != null )
920         {
921             keySerializerFqcn = Strings.utf8ToString( keySerializerBytes );
922         }
923 
924         BTreeFactory.setKeySerializer( btree, keySerializerFqcn );
925 
926         // The valueSerialier FQCN
927         ByteBuffer valueSerializerBytes = readBytes( infoPageIos, dataPos );
928 
929         String valueSerializerFqcn = "";
930         dataPos += INT_SIZE + valueSerializerBytes.limit();
931 
932         if ( valueSerializerBytes != null )
933         {
934             valueSerializerFqcn = Strings.utf8ToString( valueSerializerBytes );
935         }
936 
937         BTreeFactory.setValueSerializer( btree, valueSerializerFqcn );
938 
939         // The B-tree allowDuplicates flag
940         int allowDuplicates = readInt( infoPageIos, dataPos );
941         ( ( PersistedBTree<K, V> ) btree ).setAllowDuplicates( allowDuplicates != 0 );
942         dataPos += INT_SIZE;
943 
944         // Set the recordManager in the btree
945         ( ( PersistedBTree<K, V> ) btree ).setRecordManager( this );
946 
947         // Set the current revision to the one stored in the B-tree header
948         // Here, we have to tell the BTree to keep this revision in the
949         // btreeRevisions Map, thus the 'true' parameter at the end.
950         ((PersistedBTree<K, V>)btree).storeRevision( btreeHeader, true );
951 
952         // Now, init the B-tree
953         ( ( PersistedBTree<K, V> ) btree ).init( parentBTree );
954         
955         // Update the BtreeHeaders Maps
956         currentBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
957         newBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
958 
959         // Read the rootPage pages on disk
960         PageIO[] rootPageIos = readPageIOs( rootPageOffset, Long.MAX_VALUE );
961 
962         Page<K, V> btreeRoot = readPage( btree, rootPageIos );
963         BTreeFactory.setRecordManager( btree, this );
964 
965         BTreeFactory.setRootPage( btree, btreeRoot );
966     }
967 
968 
969     /**
970      * Deserialize a Page from a B-tree at a give position
971      *
972      * @param btree The B-tree we want to read a Page from
973      * @param offset The position in the file for this page
974      * @return The read page
975      * @throws EndOfFileExceededException If we have reached the end of the file while reading the page
976      */
977     public <K, V> Page<K, V> deserialize( BTree<K, V> btree, long offset ) throws EndOfFileExceededException,
978         IOException
979     {
980         checkOffset( offset );
981         PageIO[] rootPageIos = readPageIOs( offset, Long.MAX_VALUE );
982 
983         Page<K, V> page = readPage( btree, rootPageIos );
984 
985         return page;
986     }
987 
988 
989     /**
990      * Read a page from some PageIO for a given B-tree
991      * @param btree The B-tree we want to read a page for
992      * @param pageIos The PageIO containing the raw data
993      * @return The read Page if successful
994      * @throws IOException If the deserialization failed
995      */
996     private <K, V> Page<K, V> readPage( BTree<K, V> btree, PageIO[] pageIos ) throws IOException
997     {
998         // Deserialize the rootPage now
999         long position = 0L;
1000 
1001         // The revision
1002         long revision = readLong( pageIos, position );
1003         position += LONG_SIZE;
1004 
1005         // The number of elements in the page
1006         int nbElems = readInt( pageIos, position );
1007         position += INT_SIZE;
1008 
1009         // The size of the data containing the keys and values
1010         Page<K, V> page = null;
1011 
1012         // Reads the bytes containing all the keys and values, if we have some
1013         // We read  big blog of data into  ByteBuffer, then we will process
1014         // this ByteBuffer
1015         ByteBuffer byteBuffer = readBytes( pageIos, position );
1016 
1017         // Now, deserialize the data block. If the number of elements
1018         // is positive, it's a Leaf, otherwise it's a Node
1019         // Note that only a leaf can have 0 elements, and it's the root page then.
1020         if ( nbElems >= 0 )
1021         {
1022             // It's a leaf
1023             page = readLeafKeysAndValues( btree, nbElems, revision, byteBuffer, pageIos );
1024         }
1025         else
1026         {
1027             // It's a node
1028             page = readNodeKeysAndValues( btree, -nbElems, revision, byteBuffer, pageIos );
1029         }
1030 
1031         ( ( AbstractPage<K, V> ) page ).setOffset( pageIos[0].getOffset() );
1032         if ( pageIos.length > 1 )
1033         {
1034             ( ( AbstractPage<K, V> ) page ).setLastOffset( pageIos[pageIos.length - 1].getOffset() );
1035         }
1036 
1037         return page;
1038     }
1039 
1040 
1041     /**
1042      * Deserialize a Leaf from some PageIOs
1043      */
1044     private <K, V> PersistedLeaf<K, V> readLeafKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
1045         ByteBuffer byteBuffer, PageIO[] pageIos )
1046     {
1047         // Its a leaf, create it
1048         PersistedLeaf<K, V> leaf = ( PersistedLeaf<K, V> ) BTreeFactory.createLeaf( btree, revision, nbElems );
1049 
1050         // Store the page offset on disk
1051         leaf.setOffset( pageIos[0].getOffset() );
1052         leaf.setLastOffset( pageIos[pageIos.length - 1].getOffset() );
1053 
1054         int[] keyLengths = new int[nbElems];
1055         int[] valueLengths = new int[nbElems];
1056 
1057         boolean isNotSubTree = ( btree.getType() != BTreeTypeEnum.PERSISTED_SUB );
1058 
1059         // Read each key and value
1060         for ( int i = 0; i < nbElems; i++ )
1061         {
1062             if ( isNotSubTree )
1063             {
1064                 // Read the number of values
1065                 int nbValues = byteBuffer.getInt();
1066                 PersistedValueHolder<V> valueHolder = null;
1067                 
1068                 if ( nbValues < 0 )
1069                 {
1070                     // This is a sub-btree
1071                     byte[] btreeOffsetBytes = new byte[LONG_SIZE];
1072                     byteBuffer.get( btreeOffsetBytes );
1073                     
1074                     // Create the valueHolder. As the number of values is negative, we have to switch
1075                     // to a positive value but as we start at -1 for 0 value, add 1.
1076                     valueHolder = new PersistedValueHolder<V>( btree, 1 - nbValues, btreeOffsetBytes );
1077                 }
1078                 else
1079                 {
1080                     // This is an array
1081                     // Read the value's array length
1082                     valueLengths[i] = byteBuffer.getInt();
1083                     
1084                     // This is an Array of values, read the byte[] associated with it
1085                     byte[] arrayBytes = new byte[valueLengths[i]];
1086                     byteBuffer.get( arrayBytes );
1087                     valueHolder = new PersistedValueHolder<V>( btree, nbValues, arrayBytes );
1088                 }
1089                 
1090                 BTreeFactory.setValue( btree, leaf, i, valueHolder );
1091             }
1092 
1093             keyLengths[i] = byteBuffer.getInt();
1094             byte[] data = new byte[keyLengths[i]];
1095             byteBuffer.get( data );
1096             BTreeFactory.setKey( btree, leaf, i, data );
1097         }
1098 
1099         return leaf;
1100     }
1101 
1102 
1103     /**
1104      * Deserialize a Node from some PageIos
1105      */
1106     private <K, V> PersistedNode<K, V> readNodeKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
1107         ByteBuffer byteBuffer, PageIO[] pageIos ) throws IOException
1108     {
1109         PersistedNode<K, V> node = ( PersistedNode<K, V> ) BTreeFactory.createNode( btree, revision, nbElems );
1110 
1111         // Read each value and key
1112         for ( int i = 0; i < nbElems; i++ )
1113         {
1114             // This is an Offset
1115             long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1116             long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1117 
1118             PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
1119             node.setValue( i, valueHolder );
1120 
1121             // Read the key length
1122             int keyLength = byteBuffer.getInt();
1123 
1124             int currentPosition = byteBuffer.position();
1125 
1126             // and the key value
1127             K key = btree.getKeySerializer().deserialize( byteBuffer );
1128 
1129             // Set the new position now
1130             byteBuffer.position( currentPosition + keyLength );
1131 
1132             BTreeFactory.setKey( btree, node, i, key );
1133         }
1134 
1135         // and read the last value, as it's a node
1136         long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1137         long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1138 
1139         PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
1140         node.setValue( nbElems, valueHolder );
1141 
1142         return node;
1143     }
1144 
1145 
1146     /**
1147      * Read a byte[] from pages.
1148      *
1149      * @param pageIos The pages we want to read the byte[] from
1150      * @param position The position in the data stored in those pages
1151      * @return The byte[] we have read
1152      */
1153     /* no qualifier */ ByteBuffer readBytes( PageIO[] pageIos, long position )
1154     {
1155         // Read the byte[] length first
1156         int length = readInt( pageIos, position );
1157         position += INT_SIZE;
1158 
1159         // Compute the page in which we will store the data given the
1160         // current position
1161         int pageNb = computePageNb( position );
1162 
1163         // Compute the position in the current page
1164         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1165 
1166         ByteBuffer pageData = pageIos[pageNb].getData();
1167         int remaining = pageData.capacity() - pagePos;
1168 
1169         if ( length == 0 )
1170         {
1171             // No bytes to read : return null;
1172             return null;
1173         }
1174         else
1175         {
1176             ByteBuffer bytes = ByteBuffer.allocate( length );
1177 
1178             while ( length > 0 )
1179             {
1180                 if ( length <= remaining )
1181                 {
1182                     pageData.mark();
1183                     pageData.position( pagePos );
1184                     int oldLimit = pageData.limit();
1185                     pageData.limit( pagePos + length );
1186                     bytes.put( pageData );
1187                     pageData.limit( oldLimit );
1188                     pageData.reset();
1189                     bytes.rewind();
1190 
1191                     return bytes;
1192                 }
1193 
1194                 pageData.mark();
1195                 pageData.position( pagePos );
1196                 int oldLimit = pageData.limit();
1197                 pageData.limit( pagePos + remaining );
1198                 bytes.put( pageData );
1199                 pageData.limit( oldLimit );
1200                 pageData.reset();
1201                 pageNb++;
1202                 pagePos = LINK_SIZE;
1203                 pageData = pageIos[pageNb].getData();
1204                 length -= remaining;
1205                 remaining = pageData.capacity() - pagePos;
1206             }
1207 
1208             bytes.rewind();
1209 
1210             return bytes;
1211         }
1212     }
1213 
1214 
1215     /**
1216      * Read an int from pages
1217      * @param pageIos The pages we want to read the int from
1218      * @param position The position in the data stored in those pages
1219      * @return The int we have read
1220      */
1221     /* no qualifier */ int readInt( PageIO[] pageIos, long position )
1222     {
1223         // Compute the page in which we will store the data given the
1224         // current position
1225         int pageNb = computePageNb( position );
1226 
1227         // Compute the position in the current page
1228         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1229 
1230         ByteBuffer pageData = pageIos[pageNb].getData();
1231         int remaining = pageData.capacity() - pagePos;
1232         int value = 0;
1233 
1234         if ( remaining >= INT_SIZE )
1235         {
1236             value = pageData.getInt( pagePos );
1237         }
1238         else
1239         {
1240             value = 0;
1241 
1242             switch ( remaining )
1243             {
1244                 case 3:
1245                     value += ( ( pageData.get( pagePos + 2 ) & 0x00FF ) << 8 );
1246                     // Fallthrough !!!
1247 
1248                 case 2:
1249                     value += ( ( pageData.get( pagePos + 1 ) & 0x00FF ) << 16 );
1250                     // Fallthrough !!!
1251 
1252                 case 1:
1253                     value += ( pageData.get( pagePos ) << 24 );
1254                     break;
1255             }
1256 
1257             // Now deal with the next page
1258             pageData = pageIos[pageNb + 1].getData();
1259             pagePos = LINK_SIZE;
1260 
1261             switch ( remaining )
1262             {
1263                 case 1:
1264                     value += ( pageData.get( pagePos ) & 0x00FF ) << 16;
1265                     // fallthrough !!!
1266 
1267                 case 2:
1268                     value += ( pageData.get( pagePos + 2 - remaining ) & 0x00FF ) << 8;
1269                     // fallthrough !!!
1270 
1271                 case 3:
1272                     value += ( pageData.get( pagePos + 3 - remaining ) & 0x00FF );
1273                     break;
1274             }
1275         }
1276 
1277         return value;
1278     }
1279 
1280 
1281     /**
1282      * Read a byte from pages
1283      * @param pageIos The pages we want to read the byte from
1284      * @param position The position in the data stored in those pages
1285      * @return The byte we have read
1286      */
1287     private byte readByte( PageIO[] pageIos, long position )
1288     {
1289         // Compute the page in which we will store the data given the
1290         // current position
1291         int pageNb = computePageNb( position );
1292 
1293         // Compute the position in the current page
1294         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1295 
1296         ByteBuffer pageData = pageIos[pageNb].getData();
1297         byte value = 0;
1298 
1299         value = pageData.get( pagePos );
1300 
1301         return value;
1302     }
1303 
1304 
1305     /**
1306      * Read a long from pages
1307      * @param pageIos The pages we want to read the long from
1308      * @param position The position in the data stored in those pages
1309      * @return The long we have read
1310      */
1311     /* no qualifier */ long readLong( PageIO[] pageIos, long position )
1312     {
1313         // Compute the page in which we will store the data given the
1314         // current position
1315         int pageNb = computePageNb( position );
1316 
1317         // Compute the position in the current page
1318         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1319 
1320         ByteBuffer pageData = pageIos[pageNb].getData();
1321         int remaining = pageData.capacity() - pagePos;
1322         long value = 0L;
1323 
1324         if ( remaining >= LONG_SIZE )
1325         {
1326             value = pageData.getLong( pagePos );
1327         }
1328         else
1329         {
1330             switch ( remaining )
1331             {
1332                 case 7:
1333                     value += ( ( ( long ) pageData.get( pagePos + 6 ) & 0x00FF ) << 8 );
1334                     // Fallthrough !!!
1335 
1336                 case 6:
1337                     value += ( ( ( long ) pageData.get( pagePos + 5 ) & 0x00FF ) << 16 );
1338                     // Fallthrough !!!
1339 
1340                 case 5:
1341                     value += ( ( ( long ) pageData.get( pagePos + 4 ) & 0x00FF ) << 24 );
1342                     // Fallthrough !!!
1343 
1344                 case 4:
1345                     value += ( ( ( long ) pageData.get( pagePos + 3 ) & 0x00FF ) << 32 );
1346                     // Fallthrough !!!
1347 
1348                 case 3:
1349                     value += ( ( ( long ) pageData.get( pagePos + 2 ) & 0x00FF ) << 40 );
1350                     // Fallthrough !!!
1351 
1352                 case 2:
1353                     value += ( ( ( long ) pageData.get( pagePos + 1 ) & 0x00FF ) << 48 );
1354                     // Fallthrough !!!
1355 
1356                 case 1:
1357                     value += ( ( long ) pageData.get( pagePos ) << 56 );
1358                     break;
1359             }
1360 
1361             // Now deal with the next page
1362             pageData = pageIos[pageNb + 1].getData();
1363             pagePos = LINK_SIZE;
1364 
1365             switch ( remaining )
1366             {
1367                 case 1:
1368                     value += ( ( long ) pageData.get( pagePos ) & 0x00FF ) << 48;
1369                     // fallthrough !!!
1370 
1371                 case 2:
1372                     value += ( ( long ) pageData.get( pagePos + 2 - remaining ) & 0x00FF ) << 40;
1373                     // fallthrough !!!
1374 
1375                 case 3:
1376                     value += ( ( long ) pageData.get( pagePos + 3 - remaining ) & 0x00FF ) << 32;
1377                     // fallthrough !!!
1378 
1379                 case 4:
1380                     value += ( ( long ) pageData.get( pagePos + 4 - remaining ) & 0x00FF ) << 24;
1381                     // fallthrough !!!
1382 
1383                 case 5:
1384                     value += ( ( long ) pageData.get( pagePos + 5 - remaining ) & 0x00FF ) << 16;
1385                     // fallthrough !!!
1386 
1387                 case 6:
1388                     value += ( ( long ) pageData.get( pagePos + 6 - remaining ) & 0x00FF ) << 8;
1389                     // fallthrough !!!
1390 
1391                 case 7:
1392                     value += ( ( long ) pageData.get( pagePos + 7 - remaining ) & 0x00FF );
1393                     break;
1394             }
1395         }
1396 
1397         return value;
1398     }
1399 
1400 
1401     /**
1402      * Manage a B-tree. The btree will be added and managed by this RecordManager. We will create a
1403      * new RootPage for this added B-tree, which will contain no data.<br/>
1404      * This method is threadsafe.
1405      *
1406      * @param btree The new B-tree to manage.
1407      * @throws BTreeAlreadyManagedException if the B-tree is already managed
1408      * @throws IOException if there was a problem while accessing the file
1409      */
1410     public synchronized <K, V> void manage( BTree<K, V> btree ) throws BTreeAlreadyManagedException, IOException
1411     {
1412         beginTransaction();
1413 
1414         manage( ( BTree<Object, Object> ) btree, NORMAL_BTREE );
1415 
1416         commit();
1417     }
1418 
1419 
1420     /**
1421      * Managing a btree is a matter of storing an reference to the managed B-tree in the B-tree Of B-trees.
1422      * We store a tuple of NameRevision (where revision is 0L) and a offset to the B-tree header.
1423      * At the same time, we keep a track of the managed B-trees in a Map.
1424      *
1425      * @param btree The new B-tree to manage.
1426      * @param treeType flag indicating if this is an internal tree
1427      *
1428      * @throws BTreeAlreadyManagedException If the B-tree is already managed
1429      * @throws IOException
1430      */
1431     public synchronized <K, V> void manage( BTree<K, V> btree, boolean treeType )
1432         throws BTreeAlreadyManagedException, IOException
1433     {
1434         LOG.debug( "Managing the btree {} which is an internam tree : {}", btree.getName(), treeType );
1435         BTreeFactory.setRecordManager( btree, this );
1436 
1437         String name = btree.getName();
1438 
1439         if ( managedBtrees.containsKey( name ) )
1440         {
1441             // There is already a B-tree with this name in the recordManager...
1442             LOG.error( "There is already a B-tree named '{}' managed by this recordManager", name );
1443             throw new BTreeAlreadyManagedException( name );
1444         }
1445 
1446         // Now, write the B-tree informations
1447         long btreeInfoOffset = writeBtreeInfo( btree );
1448         BTreeHeader<K, V> btreeHeader = ((AbstractBTree<K,V>)btree).getBtreeHeader();
1449         ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( btreeInfoOffset );
1450 
1451         // Serialize the B-tree root page
1452         Page<K, V> rootPage = btreeHeader.getRootPage();
1453 
1454         PageIO[] rootPageIos = serializePage( btree, btreeHeader.getRevision(), rootPage );
1455 
1456         // Get the reference on the first page
1457         long rootPageOffset =  rootPageIos[0].getOffset();
1458 
1459         // Store the rootPageOffset into the Btree header and into the rootPage
1460         btreeHeader.setRootPageOffset( rootPageOffset );
1461         ( ( PersistedLeaf<K, V> ) rootPage ).setOffset( rootPageOffset );
1462 
1463         LOG.debug( "Flushing the newly managed '{}' btree rootpage", btree.getName() );
1464         flushPages( rootPageIos );
1465 
1466         // And the B-tree header
1467         long btreeHeaderOffset = writeBtreeHeader( btree, btreeHeader );
1468 
1469         // Now, if this is a new B-tree, add it to the B-tree of B-trees
1470         if ( treeType != INTERNAL_BTREE )
1471         {
1472             // Add the btree into the map of managed B-trees
1473             managedBtrees.put( name, ( BTree<Object, Object> ) btree );
1474             
1475             // And in the Map of currentBtreeHeaders and newBtreeHeaders
1476             currentBTreeHeaders.put( name, btreeHeader );
1477             newBTreeHeaders.put( name, btreeHeader );
1478 
1479             // We can safely increment the number of managed B-trees
1480             nbBtree++;
1481 
1482             // Create the new NameRevision
1483             NameRevision nameRevision = new NameRevision( name, 0L );
1484 
1485             // Inject it into the B-tree of B-tree
1486             btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
1487         }
1488     }
1489 
1490 
1491     /**
1492      * Serialize a new Page. It will contain the following data :<br/>
1493      * <ul>
1494      * <li>the revision : a long</li>
1495      * <li>the number of elements : an int (if <= 0, it's a Node, otherwise it's a Leaf)</li>
1496      * <li>the size of the values/keys when serialized
1497      * <li>the keys : an array of serialized keys</li>
1498      * <li>the values : an array of references to the children pageIO offset (stored as long)
1499      * if it's a Node, or a list of values if it's a Leaf</li>
1500      * <li></li>
1501      * </ul>
1502      *
1503      * @param revision The node revision
1504      * @param keys The keys to serialize
1505      * @param children The references to the children
1506      * @return An array of pages containing the serialized node
1507      * @throws IOException
1508      */
1509     private <K, V> PageIO[] serializePage( BTree<K, V> btree, long revision, Page<K, V> page ) throws IOException
1510     {
1511         int nbElems = page.getNbElems();
1512 
1513         boolean isNotSubTree = ( btree.getType() != BTreeTypeEnum.PERSISTED_SUB );
1514         
1515         if ( nbElems == 0 )
1516         {
1517             return serializeRootPage( revision );
1518         }
1519         else
1520         {
1521             // Prepare a list of byte[] that will contain the serialized page
1522             int nbBuffers = 1 + 1 + 1 + nbElems * 3;
1523             int dataSize = 0;
1524             int serializedSize = 0;
1525 
1526             if ( page.isNode() )
1527             {
1528                 // A Node has one more value to store
1529                 nbBuffers++;
1530             }
1531 
1532             // Now, we can create the list with the right size
1533             List<byte[]> serializedData = new ArrayList<byte[]>( nbBuffers );
1534 
1535             // The revision
1536             byte[] buffer = LongSerializer.serialize( revision );
1537             serializedData.add( buffer );
1538             serializedSize += buffer.length;
1539 
1540             // The number of elements
1541             // Make it a negative value if it's a Node
1542             int pageNbElems = nbElems;
1543 
1544             if ( page.isNode() )
1545             {
1546                 pageNbElems = -nbElems;
1547             }
1548 
1549             buffer = IntSerializer.serialize( pageNbElems );
1550             serializedData.add( buffer );
1551             serializedSize += buffer.length;
1552 
1553             // Iterate on the keys and values. We first serialize the value, then the key
1554             // until we are done with all of them. If we are serializing a page, we have
1555             // to serialize one more value
1556             for ( int pos = 0; pos < nbElems; pos++ )
1557             {
1558                 // Start with the value
1559                 if ( page.isNode() )
1560                 {
1561                     dataSize += serializeNodeValue( ( PersistedNode<K, V> ) page, pos, serializedData );
1562                     dataSize += serializeNodeKey( ( PersistedNode<K, V> ) page, pos, serializedData );
1563                 }
1564                 else
1565                 {
1566                     if ( isNotSubTree )
1567                     {
1568                         dataSize += serializeLeafValue( ( PersistedLeaf<K, V> ) page, pos, serializedData );
1569                     }
1570                     
1571                     dataSize += serializeLeafKey( ( PersistedLeaf<K, V> ) page, pos, serializedData );
1572                 }
1573             }
1574 
1575             // Nodes have one more value to serialize
1576             if ( page.isNode() )
1577             {
1578                 dataSize += serializeNodeValue( ( PersistedNode<K, V> ) page, nbElems, serializedData );
1579             }
1580 
1581             // Store the data size
1582             buffer = IntSerializer.serialize( dataSize );
1583             serializedData.add( 2, buffer );
1584             serializedSize += buffer.length;
1585 
1586             serializedSize += dataSize;
1587 
1588             // We are done. Allocate the pages we need to store the data
1589             PageIO[] pageIos = getFreePageIOs( serializedSize );
1590 
1591             // And store the data into those pages
1592             long position = 0L;
1593 
1594             for ( byte[] bytes : serializedData )
1595             {
1596                 position = storeRaw( position, bytes, pageIos );
1597             }
1598 
1599             return pageIos;
1600         }
1601     }
1602 
1603 
1604     /**
1605      * Serialize a Node's key
1606      */
1607     private <K, V> int serializeNodeKey( PersistedNode<K, V> node, int pos, List<byte[]> serializedData )
1608     {
1609         KeyHolder<K> holder = node.getKeyHolder( pos );
1610         byte[] buffer = ( ( PersistedKeyHolder<K> ) holder ).getRaw();
1611 
1612         // We have to store the serialized key length
1613         byte[] length = IntSerializer.serialize( buffer.length );
1614         serializedData.add( length );
1615 
1616         // And store the serialized key now if not null
1617         if ( buffer.length != 0 )
1618         {
1619             serializedData.add( buffer );
1620         }
1621 
1622         return buffer.length + INT_SIZE;
1623     }
1624 
1625 
1626     /**
1627      * Serialize a Node's Value. We store the two offsets of the child page.
1628      */
1629     private <K, V> int serializeNodeValue( PersistedNode<K, V> node, int pos, List<byte[]> serializedData )
1630         throws IOException
1631     {
1632         // For a node, we just store the children's offsets
1633         Page<K, V> child = node.getReference( pos );
1634 
1635         // The first offset
1636         byte[] buffer = LongSerializer.serialize( ( ( AbstractPage<K, V> ) child ).getOffset() );
1637         serializedData.add( buffer );
1638         int dataSize = buffer.length;
1639 
1640         // The last offset
1641         buffer = LongSerializer.serialize( ( ( AbstractPage<K, V> ) child ).getLastOffset() );
1642         serializedData.add( buffer );
1643         dataSize += buffer.length;
1644 
1645         return dataSize;
1646     }
1647 
1648 
1649     /**
1650      * Serialize a Leaf's key
1651      */
1652     private <K, V> int serializeLeafKey( PersistedLeaf<K, V> leaf, int pos, List<byte[]> serializedData )
1653     {
1654         int dataSize = 0;
1655         KeyHolder<K> keyHolder = leaf.getKeyHolder( pos );
1656         byte[] keyData = ( ( PersistedKeyHolder<K> ) keyHolder ).getRaw();
1657 
1658         if ( keyData != null )
1659         {
1660             // We have to store the serialized key length
1661             byte[] length = IntSerializer.serialize( keyData.length );
1662             serializedData.add( length );
1663 
1664             // And the key data
1665             serializedData.add( keyData );
1666             dataSize += keyData.length + INT_SIZE;
1667         }
1668         else
1669         {
1670             serializedData.add( IntSerializer.serialize( 0 ) );
1671             dataSize += INT_SIZE;
1672         }
1673 
1674         return dataSize;
1675     }
1676 
1677 
1678     /**
1679      * Serialize a Leaf's Value.
1680      */
1681     private <K, V> int serializeLeafValue( PersistedLeaf<K, V> leaf, int pos, List<byte[]> serializedData )
1682         throws IOException
1683     {
1684         // The value can be an Array or a sub-btree, but we don't care
1685         // we just iterate on all the values
1686         ValueHolder<V> valueHolder = leaf.getValue( pos );
1687         int dataSize = 0;
1688         int nbValues = valueHolder.size();
1689 
1690         if ( !valueHolder.isSubBtree() )
1691         {
1692             // Write the nb elements first
1693             byte[] buffer = IntSerializer.serialize( nbValues );
1694             serializedData.add( buffer );
1695             dataSize = INT_SIZE;
1696 
1697             // We have a serialized value. Just flush it
1698             byte[] data = ( ( PersistedValueHolder<V> ) valueHolder ).getRaw();
1699             dataSize += data.length;
1700 
1701             // Store the data size
1702             buffer = IntSerializer.serialize( data.length );
1703             serializedData.add( buffer );
1704             dataSize += INT_SIZE;
1705 
1706             // and add the data if it's not 0
1707             if ( data.length > 0 )
1708             {
1709                 serializedData.add( data );
1710             }
1711         }
1712         else
1713         {
1714             if ( nbValues == 0 )
1715             {
1716                 // No value.
1717                 byte[] buffer = IntSerializer.serialize( nbValues );
1718                 serializedData.add( buffer );
1719 
1720                 return buffer.length;
1721             }
1722 
1723             if ( valueHolder.isSubBtree() )
1724             {
1725                 // Store the nbVlues as a negative number. We add 1 so that 0 is not confused with an Array value
1726                 byte[] buffer = IntSerializer.serialize( -( nbValues + 1 ) );
1727                 serializedData.add( buffer );
1728                 dataSize += buffer.length;
1729 
1730                 // the B-tree offset
1731                 buffer = LongSerializer.serialize( ( ( PersistedValueHolder<V> ) valueHolder ).getOffset() );
1732                 serializedData.add( buffer );
1733                 dataSize += buffer.length;
1734             }
1735             else
1736             {
1737                 // This is an array, store the nb of values as a positive number
1738                 byte[] buffer = IntSerializer.serialize( nbValues );
1739                 serializedData.add( buffer );
1740                 dataSize += buffer.length;
1741 
1742                 // Now store each value
1743                 byte[] data = ( ( PersistedValueHolder<V> ) valueHolder ).getRaw();
1744                 buffer = IntSerializer.serialize( data.length );
1745                 serializedData.add( buffer );
1746                 dataSize += buffer.length;
1747 
1748                 if ( data.length > 0 )
1749                 {
1750                     serializedData.add( data );
1751                 }
1752 
1753                 dataSize += data.length;
1754             }
1755         }
1756 
1757         return dataSize;
1758     }
1759 
1760 
1761     /**
1762      * Write a root page with no elements in it
1763      */
1764     private PageIO[] serializeRootPage( long revision ) throws IOException
1765     {
1766         // We will have 1 single page if we have no elements
1767         PageIO[] pageIos = new PageIO[1];
1768 
1769         // This is either a new root page or a new page that will be filled later
1770         PageIO newPage = fetchNewPage();
1771 
1772         // We need first to create a byte[] that will contain all the data
1773         // For the root page, this is easy, as we only have to store the revision,
1774         // and the number of elements, which is 0.
1775         long position = 0L;
1776 
1777         position = store( position, revision, newPage );
1778         position = store( position, 0, newPage );
1779 
1780         // Update the page size now
1781         newPage.setSize( ( int ) position );
1782 
1783         // Insert the result into the array of PageIO
1784         pageIos[0] = newPage;
1785 
1786         return pageIos;
1787     }
1788 
1789 
1790     /**
1791      * Update the RecordManager header, injecting the following data :
1792      *
1793      * <pre>
1794      * +---------------------+
1795      * | PageSize            | 4 bytes : The size of a physical page (default to 4096)
1796      * +---------------------+
1797      * | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
1798      * +---------------------+
1799      * | FirstFree           | 8 bytes : The offset of the first free page
1800      * +---------------------+
1801      * | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
1802      * +---------------------+
1803      * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
1804      * +---------------------+
1805      * | current CP offset   | 8 bytes : The offset of the current CopiedPages B-tree
1806      * +---------------------+
1807      * | previous CP offset  | 8 bytes : The offset of the previous CopiedPages B-tree
1808      * +---------------------+
1809      * </pre>
1810      */
1811     public void updateRecordManagerHeader()
1812     {
1813         // The page size
1814         int position = writeData( RECORD_MANAGER_HEADER_BYTES, 0, pageSize );
1815 
1816         // The number of managed B-tree
1817         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, nbBtree );
1818 
1819         // The first free page
1820         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, firstFreePage );
1821 
1822         // The offset of the current B-tree of B-trees
1823         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, currentBtreeOfBtreesOffset );
1824 
1825         // The offset of the copied pages B-tree
1826         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, previousBtreeOfBtreesOffset );
1827 
1828         // Write the RecordManager header on disk
1829         RECORD_MANAGER_HEADER_BUFFER.put( RECORD_MANAGER_HEADER_BYTES );
1830         RECORD_MANAGER_HEADER_BUFFER.flip();
1831 
1832         LOG.debug( "Update RM header" );
1833 
1834         if ( LOG_PAGES.isDebugEnabled() )
1835         {
1836             StringBuilder sb = new StringBuilder();
1837 
1838             sb.append( "First free page     : 0x" ).append( Long.toHexString( firstFreePage ) ).append( "\n" );
1839             sb.append( "Current BOB header  : 0x" ).append( Long.toHexString( currentBtreeOfBtreesOffset ) ).append( "\n" );
1840             sb.append( "Previous BOB header : 0x" ).append( Long.toHexString( previousBtreeOfBtreesOffset ) ).append( "\n" );
1841 
1842             if ( firstFreePage != NO_PAGE )
1843             {
1844                 long freePage = firstFreePage;
1845                 sb.append( "free pages list : " );
1846 
1847                 boolean isFirst = true;
1848 
1849                 while ( freePage != NO_PAGE )
1850                 {
1851                     if ( isFirst )
1852                     {
1853                         isFirst = false;
1854                     }
1855                     else
1856                     {
1857                         sb.append( " -> " );
1858                     }
1859 
1860                     sb.append( "0x" ).append( Long.toHexString( freePage ) );
1861 
1862                     try
1863                     {
1864                         PageIO[] freePageIO = readPageIOs( freePage, 8 );
1865 
1866                         freePage = freePageIO[0].getNextPage();
1867                     }
1868                     catch ( EndOfFileExceededException e )
1869                     {
1870                         // TODO Auto-generated catch block
1871                         e.printStackTrace();
1872                     }
1873                     catch ( IOException e )
1874                     {
1875                         // TODO Auto-generated catch block
1876                         e.printStackTrace();
1877                     }
1878                 }
1879 
1880             }
1881 
1882             LOG_PAGES.debug( "Update RM Header : \n{}", sb.toString() );
1883         }
1884 
1885         try
1886         {
1887             fileChannel.write( RECORD_MANAGER_HEADER_BUFFER, 0 );
1888         }
1889         catch ( IOException ioe )
1890         {
1891             throw new FileException( ioe.getMessage() );
1892         }
1893 
1894         RECORD_MANAGER_HEADER_BUFFER.clear();
1895 
1896         // Reset the old versions
1897         previousBtreeOfBtreesOffset = -1L;
1898 
1899         nbUpdateRMHeader.incrementAndGet();
1900     }
1901 
1902 
1903     /**
1904      * Update the RecordManager header, injecting the following data :
1905      *
1906      * <pre>
1907      * +---------------------+
1908      * | PageSize            | 4 bytes : The size of a physical page (default to 4096)
1909      * +---------------------+
1910      * | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
1911      * +---------------------+
1912      * | FirstFree           | 8 bytes : The offset of the first free page
1913      * +---------------------+
1914      * | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
1915      * +---------------------+
1916      * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
1917      * +---------------------+
1918      * | current CP offset   | 8 bytes : The offset of the current CopiedPages B-tree
1919      * +---------------------+
1920      * | previous CP offset  | 8 bytes : The offset of the previous CopiedPages B-tree
1921      * +---------------------+
1922      * </pre>
1923      */
1924     public void updateRecordManagerHeader( long newBtreeOfBtreesOffset, long newCopiedPageBtreeOffset )
1925     {
1926         if ( newBtreeOfBtreesOffset != -1L )
1927         {
1928             previousBtreeOfBtreesOffset = currentBtreeOfBtreesOffset;
1929             currentBtreeOfBtreesOffset = newBtreeOfBtreesOffset;
1930         }
1931     }
1932 
1933 
1934     /**
1935      * Inject an int into a byte[] at a given position.
1936      */
1937     private int writeData( byte[] buffer, int position, int value )
1938     {
1939         RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 24 );
1940         RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 16 );
1941         RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 8 );
1942         RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value );
1943 
1944         return position + 4;
1945     }
1946 
1947 
1948     /**
1949      * Inject a long into a byte[] at a given position.
1950      */
1951     private int writeData( byte[] buffer, int position, long value )
1952     {
1953         RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 56 );
1954         RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 48 );
1955         RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 40 );
1956         RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value >>> 32 );
1957         RECORD_MANAGER_HEADER_BYTES[position+4] = ( byte ) ( value >>> 24 );
1958         RECORD_MANAGER_HEADER_BYTES[position+5] = ( byte ) ( value >>> 16 );
1959         RECORD_MANAGER_HEADER_BYTES[position+6] = ( byte ) ( value >>> 8 );
1960         RECORD_MANAGER_HEADER_BYTES[position+7] = ( byte ) ( value );
1961 
1962         return position + 8;
1963     }
1964 
1965 
1966     /**
1967      * Add a new <btree, revision> tuple into the B-tree of B-trees.
1968      *
1969      * @param name The B-tree name
1970      * @param revision The B-tree revision
1971      * @param btreeHeaderOffset The B-tree offset
1972      * @throws IOException If the update failed
1973      */
1974     /* no qualifier */ <K, V> void addInBtreeOfBtrees( String name, long revision, long btreeHeaderOffset ) throws IOException
1975     {
1976         checkOffset( btreeHeaderOffset );
1977         NameRevision nameRevision = new NameRevision( name, revision );
1978 
1979         btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
1980 
1981         // Update the B-tree of B-trees offset
1982         currentBtreeOfBtreesOffset = getNewBTreeHeader( BTREE_OF_BTREES_NAME ).getBTreeHeaderOffset();
1983     }
1984 
1985 
1986     /**
1987      * Add a new <btree, revision> tuple into the CopiedPages B-tree.
1988      *
1989      * @param name The B-tree name
1990      * @param revision The B-tree revision
1991      * @param btreeHeaderOffset The B-tree offset
1992      * @throws IOException If the update failed
1993      */
1994     /* no qualifier */ <K, V> void addInCopiedPagesBtree( String name, long revision, List<Page<K, V>> pages ) throws IOException
1995     {
1996         RevisionName revisionName = new RevisionName( revision, name );
1997 
1998         long[] pageOffsets = new long[pages.size()];
1999         int pos = 0;
2000 
2001         for ( Page<K, V> page : pages )
2002         {
2003             pageOffsets[pos++] = ((AbstractPage<K, V>)page).getOffset();
2004         }
2005 
2006         copiedPageMap.put( revisionName, pageOffsets );
2007     }
2008 
2009 
2010     /**
2011      * Internal method used to update the B-tree of B-trees offset
2012      * @param btreeOfBtreesOffset The new offset
2013      */
2014     /* no qualifier */ void setBtreeOfBtreesOffset( long btreeOfBtreesOffset )
2015     {
2016         checkOffset( btreeOfBtreesOffset );
2017         this.currentBtreeOfBtreesOffset = btreeOfBtreesOffset;
2018     }
2019 
2020 
2021     /**
2022      * Write the B-tree header on disk. We will write the following informations :
2023      * <pre>
2024      * +------------+
2025      * | revision   | The B-tree revision
2026      * +------------+
2027      * | nbElems    | The B-tree number of elements
2028      * +------------+
2029      * | rootPage   | The root page offset
2030      * +------------+
2031      * | BtreeInfo  | The B-tree info offset
2032      * +------------+
2033      * </pre>
2034      * @param btree The B-tree which header has to be written
2035      * @param btreeInfoOffset The offset of the B-tree informations
2036      * @return The B-tree header offset
2037      * @throws IOException If we weren't able to write the B-tree header
2038      */
2039     /* no qualifier */ <K, V> long writeBtreeHeader( BTree<K, V> btree, BTreeHeader<K, V> btreeHeader ) throws IOException
2040     {
2041         int bufferSize =
2042             LONG_SIZE +                     // The revision
2043             LONG_SIZE +                     // the number of element
2044             LONG_SIZE +                     // The root page offset
2045             LONG_SIZE;                      // The B-tree info page offset
2046 
2047         // Get the pageIOs we need to store the data. We may need more than one.
2048         PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
2049 
2050         // Store the B-tree header Offset into the B-tree
2051         long btreeHeaderOffset = btreeHeaderPageIos[0].getOffset();
2052 
2053         // Now store the B-tree data in the pages :
2054         // - the B-tree revision
2055         // - the B-tree number of elements
2056         // - the B-tree root page offset
2057         // - the B-tree info page offset
2058         // Starts at 0
2059         long position = 0L;
2060 
2061         // The B-tree current revision
2062         position = store( position, btreeHeader.getRevision(), btreeHeaderPageIos );
2063 
2064         // The nb elems in the tree
2065         position = store( position, btreeHeader.getNbElems(), btreeHeaderPageIos );
2066 
2067 
2068         // Now, we can inject the B-tree rootPage offset into the B-tree header
2069         position = store( position, btreeHeader.getRootPageOffset(), btreeHeaderPageIos );
2070 
2071         // The B-tree info page offset
2072         position = store( position, ((PersistedBTree<K, V>)btree).getBtreeInfoOffset(), btreeHeaderPageIos );
2073 
2074         // And flush the pages to disk now
2075         LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
2076 
2077         if ( LOG_PAGES.isDebugEnabled() )
2078         {
2079             LOG_PAGES.debug( "Writing BTreeHeader revision {} for {}", btreeHeader.getRevision(), btree.getName() );
2080             StringBuilder sb = new StringBuilder();
2081 
2082             sb.append( "Offset : " ).append( Long.toHexString( btreeHeaderOffset ) ).append( "\n" );
2083             sb.append( "    Revision : " ).append( btreeHeader.getRevision() ).append( "\n" );
2084             sb.append( "    NbElems  : " ).append( btreeHeader.getNbElems() ).append( "\n" );
2085             sb.append( "    RootPage : 0x" ).append( Long.toHexString( btreeHeader.getRootPageOffset() ) ).append( "\n" );
2086             sb.append( "    Info     : 0x" ).append( Long.toHexString( ((PersistedBTree<K, V>)btree).getBtreeInfoOffset() ) ).append( "\n" );
2087 
2088             LOG_PAGES.debug( "Btree Header[{}]\n{}", btreeHeader.getRevision(), sb.toString() );
2089         }
2090 
2091         flushPages( btreeHeaderPageIos );
2092 
2093         btreeHeader.setBTreeHeaderOffset( btreeHeaderOffset );
2094 
2095         return btreeHeaderOffset;
2096     }
2097 
2098 
2099     /**
2100      * Write the B-tree informations on disk. We will write the following informations :
2101      * <pre>
2102      * +------------+
2103      * | pageSize   | The B-tree page size (ie, the number of elements per page max)
2104      * +------------+
2105      * | nameSize   | The B-tree name size
2106      * +------------+
2107      * | name       | The B-tree name
2108      * +------------+
2109      * | keySerSize | The keySerializer FQCN size
2110      * +------------+
2111      * | keySerFQCN | The keySerializer FQCN
2112      * +------------+
2113      * | valSerSize | The Value serializer FQCN size
2114      * +------------+
2115      * | valSerKQCN | The valueSerializer FQCN
2116      * +------------+
2117      * | dups       | The flags that tell if the dups are allowed
2118      * +------------+
2119      * </pre>
2120      * @param btree The B-tree which header has to be written
2121      * @return The B-tree header offset
2122      * @throws IOException If we weren't able to write the B-tree header
2123      */
2124     private <K, V> long writeBtreeInfo( BTree<K, V> btree ) throws IOException
2125     {
2126         // We will add the newly managed B-tree at the end of the header.
2127         byte[] btreeNameBytes = Strings.getBytesUtf8( btree.getName() );
2128         byte[] keySerializerBytes = Strings.getBytesUtf8( btree.getKeySerializerFQCN() );
2129         byte[] valueSerializerBytes = Strings.getBytesUtf8( btree.getValueSerializerFQCN() );
2130 
2131         int bufferSize =
2132             INT_SIZE +                      // The page size
2133             INT_SIZE +                      // The name size
2134             btreeNameBytes.length +         // The name
2135             INT_SIZE +                      // The keySerializerBytes size
2136             keySerializerBytes.length +     // The keySerializerBytes
2137             INT_SIZE +                      // The valueSerializerBytes size
2138             valueSerializerBytes.length +   // The valueSerializerBytes
2139             INT_SIZE;                       // The allowDuplicates flag
2140 
2141         // Get the pageIOs we need to store the data. We may need more than one.
2142         PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
2143 
2144         // Keep the B-tree header Offset into the B-tree
2145         long btreeInfoOffset = btreeHeaderPageIos[0].getOffset();
2146 
2147         // Now store the B-tree information data in the pages :
2148         // - the B-tree page size
2149         // - the B-tree name
2150         // - the keySerializer FQCN
2151         // - the valueSerializer FQCN
2152         // - the flags that tell if the dups are allowed
2153         // Starts at 0
2154         long position = 0L;
2155 
2156         // The B-tree page size
2157         position = store( position, btree.getPageSize(), btreeHeaderPageIos );
2158 
2159         // The tree name
2160         position = store( position, btreeNameBytes, btreeHeaderPageIos );
2161 
2162         // The keySerializer FQCN
2163         position = store( position, keySerializerBytes, btreeHeaderPageIos );
2164 
2165         // The valueSerialier FQCN
2166         position = store( position, valueSerializerBytes, btreeHeaderPageIos );
2167 
2168         // The allowDuplicates flag
2169         position = store( position, ( btree.isAllowDuplicates() ? 1 : 0 ), btreeHeaderPageIos );
2170 
2171         // And flush the pages to disk now
2172         LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
2173         flushPages( btreeHeaderPageIos );
2174 
2175         return btreeInfoOffset;
2176     }
2177 
2178 
2179     /**
2180      * Update the B-tree header after a B-tree modification. This will make the latest modification
2181      * visible.<br/>
2182      * We update the following fields :
2183      * <ul>
2184      * <li>the revision</li>
2185      * <li>the number of elements</li>
2186      * <li>the B-tree root page offset</li>
2187      * </ul>
2188      * <br/>
2189      * As a result, a new version of the BtreHeader will be created, which will replace the previous
2190      * B-tree header
2191      * @param btree TheB-tree to update
2192      * @param btreeHeaderOffset The offset of the modified btree header
2193      * @return The offset of the new B-tree Header
2194      * @throws IOException If we weren't able to write the file on disk
2195      * @throws EndOfFileExceededException If we tried to write after the end of the file
2196      */
2197     /* no qualifier */ <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset )
2198         throws EndOfFileExceededException, IOException
2199     {
2200         return updateBtreeHeader( btree, btreeHeaderOffset, false );
2201     }
2202 
2203 
2204     /**
2205      * Update the B-tree header after a B-tree modification. This will make the latest modification
2206      * visible.<br/>
2207      * We update the following fields :
2208      * <ul>
2209      * <li>the revision</li>
2210      * <li>the number of elements</li>
2211      * <li>the reference to the current B-tree revisions</li>
2212      * <li>the reference to the old B-tree revisions</li>
2213      * </ul>
2214      * <br/>
2215      * As a result, we new version of the BtreHeader will be created
2216      * @param btree The B-tree to update
2217      * @param btreeHeaderOffset The offset of the modified btree header
2218      * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
2219      * @throws IOException
2220      * @throws EndOfFileExceededException
2221      */
2222     /* no qualifier */ <K, V> void updateBtreeHeaderOnPlace( BTree<K, V> btree, long btreeHeaderOffset )
2223         throws EndOfFileExceededException,
2224         IOException
2225     {
2226         updateBtreeHeader( btree, btreeHeaderOffset, true );
2227     }
2228 
2229 
2230     /**
2231      * Update the B-tree header after a B-tree modification. This will make the latest modification
2232      * visible.<br/>
2233      * We update the following fields :
2234      * <ul>
2235      * <li>the revision</li>
2236      * <li>the number of elements</li>
2237      * <li>the reference to the current B-tree revisions</li>
2238      * <li>the reference to the old B-tree revisions</li>
2239      * </ul>
2240      * <br/>
2241      * As a result, a new version of the BtreHeader will be created, which may replace the previous
2242      * B-tree header (if the onPlace flag is set to true) or a new set of pageIos will contain the new
2243      * version.
2244      *
2245      * @param btree The B-tree to update
2246      * @param rootPageOffset The offset of the modified rootPage
2247      * @param onPlace Tells if we modify the B-tree on place, or if we create a copy
2248      * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
2249      * @throws EndOfFileExceededException If we tried to write after the end of the file
2250      * @throws IOException If tehre were some error while writing the data on disk
2251      */
2252     private <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset, boolean onPlace )
2253         throws EndOfFileExceededException, IOException
2254     {
2255         // Read the pageIOs associated with this B-tree
2256         PageIO[] pageIos;
2257         long newBtreeHeaderOffset = NO_PAGE;
2258         long offset = ( ( PersistedBTree<K, V> ) btree ).getBtreeOffset();
2259 
2260         if ( onPlace )
2261         {
2262             // We just have to update the existing BTreeHeader
2263             long headerSize = LONG_SIZE + LONG_SIZE + LONG_SIZE;
2264 
2265             pageIos = readPageIOs( offset, headerSize );
2266 
2267             // Now, update the revision
2268             long position = 0;
2269 
2270             position = store( position, btree.getRevision(), pageIos );
2271             position = store( position, btree.getNbElems(), pageIos );
2272             position = store( position, btreeHeaderOffset, pageIos );
2273 
2274             // Write the pages on disk
2275             if ( LOG.isDebugEnabled() )
2276             {
2277                 LOG.debug( "-----> Flushing the '{}' B-treeHeader", btree.getName() );
2278                 LOG.debug( "  revision : " + btree.getRevision() + ", NbElems : " + btree.getNbElems() + ", btreeHeader offset : 0x"
2279                     + Long.toHexString( btreeHeaderOffset ) );
2280             }
2281 
2282             // Get new place on disk to store the modified BTreeHeader if it's not onPlace
2283             // Rewrite the pages at the same place
2284             LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
2285             flushPages( pageIos );
2286         }
2287         else
2288         {
2289             // We have to read and copy the existing BTreeHeader and to create a new one
2290             pageIos = readPageIOs( offset, Long.MAX_VALUE );
2291 
2292             // Now, copy every read page
2293             PageIO[] newPageIOs = new PageIO[pageIos.length];
2294             int pos = 0;
2295 
2296             for ( PageIO pageIo : pageIos )
2297             {
2298                 // Fetch a free page
2299                 newPageIOs[pos] = fetchNewPage();
2300 
2301                 // keep a track of the allocated and copied pages so that we can
2302                 // free them when we do a commit or rollback, if the btree is an management one
2303                 if ( ( btree.getType() == BTreeTypeEnum.BTREE_OF_BTREES ) || ( btree.getType() == BTreeTypeEnum.COPIED_PAGES_BTREE ) )
2304                 {
2305                     freedPages.add( pageIo );
2306                     allocatedPages.add( newPageIOs[pos] );
2307                 }
2308 
2309                 pageIo.copy( newPageIOs[pos] );
2310 
2311                 if ( pos > 0 )
2312                 {
2313                     newPageIOs[pos - 1].setNextPage( newPageIOs[pos].getOffset() );
2314                 }
2315 
2316                 pos++;
2317             }
2318 
2319             // store the new btree header offset
2320             // and update the revision
2321             long position = 0;
2322 
2323             position = store( position, btree.getRevision(), newPageIOs );
2324             position = store( position, btree.getNbElems(), newPageIOs );
2325             position = store( position, btreeHeaderOffset, newPageIOs );
2326 
2327             // Get new place on disk to store the modified BTreeHeader if it's not onPlace
2328             // Flush the new B-treeHeader on disk
2329             LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
2330             flushPages( newPageIOs );
2331 
2332             newBtreeHeaderOffset = newPageIOs[0].getOffset();
2333         }
2334 
2335         nbUpdateBtreeHeader.incrementAndGet();
2336 
2337         if ( LOG_CHECK.isDebugEnabled() )
2338         {
2339             MavibotInspector.check( this );
2340         }
2341 
2342         return newBtreeHeaderOffset;
2343     }
2344 
2345 
2346     /**
2347      * Write the pages on disk, either at the end of the file, or at
2348      * the position they were taken from.
2349      *
2350      * @param pageIos The list of pages to write
2351      * @throws IOException If the write failed
2352      */
2353     private void flushPages( PageIO... pageIos ) throws IOException
2354     {
2355         if ( LOG.isDebugEnabled() )
2356         {
2357             for ( PageIO pageIo : pageIos )
2358             {
2359                 dump( pageIo );
2360             }
2361         }
2362 
2363         for ( PageIO pageIo : pageIos )
2364         {
2365             pageIo.getData().rewind();
2366 
2367             if ( fileChannel.size() < ( pageIo.getOffset() + pageSize ) )
2368             {
2369                 LOG.debug( "Adding a page at the end of the file" );
2370                 // This is a page we have to add to the file
2371                 fileChannel.write( pageIo.getData(), fileChannel.size() );
2372                 //fileChannel.force( false );
2373             }
2374             else
2375             {
2376                 LOG.debug( "Writing a page at position {}", pageIo.getOffset() );
2377                 fileChannel.write( pageIo.getData(), pageIo.getOffset() );
2378                 //fileChannel.force( false );
2379             }
2380 
2381             nbUpdatePageIOs.incrementAndGet();
2382 
2383             pageIo.getData().rewind();
2384         }
2385     }
2386 
2387 
2388     /**
2389      * Compute the page in which we will store data given an offset, when
2390      * we have a list of pages.
2391      *
2392      * @param offset The position in the data
2393      * @return The page number in which the offset will start
2394      */
2395     private int computePageNb( long offset )
2396     {
2397         long pageNb = 0;
2398 
2399         offset -= pageSize - LINK_SIZE - PAGE_SIZE;
2400 
2401         if ( offset < 0 )
2402         {
2403             return ( int ) pageNb;
2404         }
2405 
2406         pageNb = 1 + offset / ( pageSize - LINK_SIZE );
2407 
2408         return ( int ) pageNb;
2409     }
2410 
2411 
2412     /**
2413      * Stores a byte[] into one ore more pageIO (depending if the long is stored
2414      * across a boundary or not)
2415      *
2416      * @param position The position in a virtual byte[] if all the pages were contiguous
2417      * @param bytes The byte[] to serialize
2418      * @param pageIos The pageIOs we have to store the data in
2419      * @return The new offset
2420      */
2421     private long store( long position, byte[] bytes, PageIO... pageIos )
2422     {
2423         if ( bytes != null )
2424         {
2425             // Write the bytes length
2426             position = store( position, bytes.length, pageIos );
2427 
2428             // Compute the page in which we will store the data given the
2429             // current position
2430             int pageNb = computePageNb( position );
2431 
2432             // Get back the buffer in this page
2433             ByteBuffer pageData = pageIos[pageNb].getData();
2434 
2435             // Compute the position in the current page
2436             int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2437 
2438             // Compute the remaining size in the page
2439             int remaining = pageData.capacity() - pagePos;
2440             int nbStored = bytes.length;
2441 
2442             // And now, write the bytes until we have none
2443             while ( nbStored > 0 )
2444             {
2445                 if ( remaining > nbStored )
2446                 {
2447                     pageData.mark();
2448                     pageData.position( pagePos );
2449                     pageData.put( bytes, bytes.length - nbStored, nbStored );
2450                     pageData.reset();
2451                     nbStored = 0;
2452                 }
2453                 else
2454                 {
2455                     pageData.mark();
2456                     pageData.position( pagePos );
2457                     pageData.put( bytes, bytes.length - nbStored, remaining );
2458                     pageData.reset();
2459                     pageNb++;
2460                     pageData = pageIos[pageNb].getData();
2461                     pagePos = LINK_SIZE;
2462                     nbStored -= remaining;
2463                     remaining = pageData.capacity() - pagePos;
2464                 }
2465             }
2466 
2467             // We are done
2468             position += bytes.length;
2469         }
2470         else
2471         {
2472             // No bytes : write 0 and return
2473             position = store( position, 0, pageIos );
2474         }
2475 
2476         return position;
2477     }
2478 
2479 
2480     /**
2481      * Stores a byte[] into one ore more pageIO (depending if the long is stored
2482      * across a boundary or not). We don't add the byte[] size, it's already present
2483      * in the received byte[].
2484      *
2485      * @param position The position in a virtual byte[] if all the pages were contiguous
2486      * @param bytes The byte[] to serialize
2487      * @param pageIos The pageIOs we have to store the data in
2488      * @return The new offset
2489      */
2490     private long storeRaw( long position, byte[] bytes, PageIO... pageIos )
2491     {
2492         if ( bytes != null )
2493         {
2494             // Compute the page in which we will store the data given the
2495             // current position
2496             int pageNb = computePageNb( position );
2497 
2498             // Get back the buffer in this page
2499             ByteBuffer pageData = pageIos[pageNb].getData();
2500 
2501             // Compute the position in the current page
2502             int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2503 
2504             // Compute the remaining size in the page
2505             int remaining = pageData.capacity() - pagePos;
2506             int nbStored = bytes.length;
2507 
2508             // And now, write the bytes until we have none
2509             while ( nbStored > 0 )
2510             {
2511                 if ( remaining > nbStored )
2512                 {
2513                     pageData.mark();
2514                     pageData.position( pagePos );
2515                     pageData.put( bytes, bytes.length - nbStored, nbStored );
2516                     pageData.reset();
2517                     nbStored = 0;
2518                 }
2519                 else
2520                 {
2521                     pageData.mark();
2522                     pageData.position( pagePos );
2523                     pageData.put( bytes, bytes.length - nbStored, remaining );
2524                     pageData.reset();
2525                     pageNb++;
2526 
2527                     if ( pageNb == pageIos.length )
2528                     {
2529                         // We can stop here : we have reach the end of the page
2530                         break;
2531                     }
2532 
2533                     pageData = pageIos[pageNb].getData();
2534                     pagePos = LINK_SIZE;
2535                     nbStored -= remaining;
2536                     remaining = pageData.capacity() - pagePos;
2537                 }
2538             }
2539 
2540             // We are done
2541             position += bytes.length;
2542         }
2543         else
2544         {
2545             // No bytes : write 0 and return
2546             position = store( position, 0, pageIos );
2547         }
2548 
2549         return position;
2550     }
2551 
2552 
2553     /**
2554      * Stores an Integer into one ore more pageIO (depending if the int is stored
2555      * across a boundary or not)
2556      *
2557      * @param position The position in a virtual byte[] if all the pages were contiguous
2558      * @param value The int to serialize
2559      * @param pageIos The pageIOs we have to store the data in
2560      * @return The new offset
2561      */
2562     private long store( long position, int value, PageIO... pageIos )
2563     {
2564         // Compute the page in which we will store the data given the
2565         // current position
2566         int pageNb = computePageNb( position );
2567 
2568         // Compute the position in the current page
2569         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2570 
2571         // Get back the buffer in this page
2572         ByteBuffer pageData = pageIos[pageNb].getData();
2573 
2574         // Compute the remaining size in the page
2575         int remaining = pageData.capacity() - pagePos;
2576 
2577         if ( remaining < INT_SIZE )
2578         {
2579             // We have to copy the serialized length on two pages
2580 
2581             switch ( remaining )
2582             {
2583                 case 3:
2584                     pageData.put( pagePos + 2, ( byte ) ( value >>> 8 ) );
2585                     // Fallthrough !!!
2586 
2587                 case 2:
2588                     pageData.put( pagePos + 1, ( byte ) ( value >>> 16 ) );
2589                     // Fallthrough !!!
2590 
2591                 case 1:
2592                     pageData.put( pagePos, ( byte ) ( value >>> 24 ) );
2593                     break;
2594             }
2595 
2596             // Now deal with the next page
2597             pageData = pageIos[pageNb + 1].getData();
2598             pagePos = LINK_SIZE;
2599 
2600             switch ( remaining )
2601             {
2602                 case 1:
2603                     pageData.put( pagePos, ( byte ) ( value >>> 16 ) );
2604                     // fallthrough !!!
2605 
2606                 case 2:
2607                     pageData.put( pagePos + 2 - remaining, ( byte ) ( value >>> 8 ) );
2608                     // fallthrough !!!
2609 
2610                 case 3:
2611                     pageData.put( pagePos + 3 - remaining, ( byte ) ( value ) );
2612                     break;
2613             }
2614         }
2615         else
2616         {
2617             // Store the value in the page at the selected position
2618             pageData.putInt( pagePos, value );
2619         }
2620 
2621         // Increment the position to reflect the addition of an Int (4 bytes)
2622         position += INT_SIZE;
2623 
2624         return position;
2625     }
2626 
2627 
2628     /**
2629      * Stores a Long into one ore more pageIO (depending if the long is stored
2630      * across a boundary or not)
2631      *
2632      * @param position The position in a virtual byte[] if all the pages were contiguous
2633      * @param value The long to serialize
2634      * @param pageIos The pageIOs we have to store the data in
2635      * @return The new offset
2636      */
2637     private long store( long position, long value, PageIO... pageIos )
2638     {
2639         // Compute the page in which we will store the data given the
2640         // current position
2641         int pageNb = computePageNb( position );
2642 
2643         // Compute the position in the current page
2644         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2645 
2646         // Get back the buffer in this page
2647         ByteBuffer pageData = pageIos[pageNb].getData();
2648 
2649         // Compute the remaining size in the page
2650         int remaining = pageData.capacity() - pagePos;
2651 
2652         if ( remaining < LONG_SIZE )
2653         {
2654             // We have to copy the serialized length on two pages
2655 
2656             switch ( remaining )
2657             {
2658                 case 7:
2659                     pageData.put( pagePos + 6, ( byte ) ( value >>> 8 ) );
2660                     // Fallthrough !!!
2661 
2662                 case 6:
2663                     pageData.put( pagePos + 5, ( byte ) ( value >>> 16 ) );
2664                     // Fallthrough !!!
2665 
2666                 case 5:
2667                     pageData.put( pagePos + 4, ( byte ) ( value >>> 24 ) );
2668                     // Fallthrough !!!
2669 
2670                 case 4:
2671                     pageData.put( pagePos + 3, ( byte ) ( value >>> 32 ) );
2672                     // Fallthrough !!!
2673 
2674                 case 3:
2675                     pageData.put( pagePos + 2, ( byte ) ( value >>> 40 ) );
2676                     // Fallthrough !!!
2677 
2678                 case 2:
2679                     pageData.put( pagePos + 1, ( byte ) ( value >>> 48 ) );
2680                     // Fallthrough !!!
2681 
2682                 case 1:
2683                     pageData.put( pagePos, ( byte ) ( value >>> 56 ) );
2684                     break;
2685             }
2686 
2687             // Now deal with the next page
2688             pageData = pageIos[pageNb + 1].getData();
2689             pagePos = LINK_SIZE;
2690 
2691             switch ( remaining )
2692             {
2693                 case 1:
2694                     pageData.put( pagePos, ( byte ) ( value >>> 48 ) );
2695                     // fallthrough !!!
2696 
2697                 case 2:
2698                     pageData.put( pagePos + 2 - remaining, ( byte ) ( value >>> 40 ) );
2699                     // fallthrough !!!
2700 
2701                 case 3:
2702                     pageData.put( pagePos + 3 - remaining, ( byte ) ( value >>> 32 ) );
2703                     // fallthrough !!!
2704 
2705                 case 4:
2706                     pageData.put( pagePos + 4 - remaining, ( byte ) ( value >>> 24 ) );
2707                     // fallthrough !!!
2708 
2709                 case 5:
2710                     pageData.put( pagePos + 5 - remaining, ( byte ) ( value >>> 16 ) );
2711                     // fallthrough !!!
2712 
2713                 case 6:
2714                     pageData.put( pagePos + 6 - remaining, ( byte ) ( value >>> 8 ) );
2715                     // fallthrough !!!
2716 
2717                 case 7:
2718                     pageData.put( pagePos + 7 - remaining, ( byte ) ( value ) );
2719                     break;
2720             }
2721         }
2722         else
2723         {
2724             // Store the value in the page at the selected position
2725             pageData.putLong( pagePos, value );
2726         }
2727 
2728         // Increment the position to reflect the addition of an Long (8 bytes)
2729         position += LONG_SIZE;
2730 
2731         return position;
2732     }
2733 
2734 
2735     /**
2736      * Write the page in a serialized form.
2737      *
2738      * @param btree The persistedBtree we will create a new PageHolder for
2739      * @param newPage The page to write on disk
2740      * @param newRevision The page's revision
2741      * @return A PageHolder containing the copied page
2742      * @throws IOException If the page can't be written on disk
2743      */
2744     /* No qualifier*/<K, V> PageHolder<K, V> writePage( BTree<K, V> btree, Page<K, V> newPage,
2745         long newRevision ) throws IOException
2746     {
2747         // We first need to save the new page on disk
2748         PageIO[] pageIos = serializePage( btree, newRevision, newPage );
2749 
2750         if ( LOG_PAGES.isDebugEnabled() )
2751         {
2752             LOG_PAGES.debug( "Write data for '{}' btree", btree.getName()  );
2753 
2754             logPageIos( pageIos );
2755         }
2756 
2757         // Write the page on disk
2758         flushPages( pageIos );
2759 
2760         // Build the resulting reference
2761         long offset = pageIos[0].getOffset();
2762         long lastOffset = pageIos[pageIos.length - 1].getOffset();
2763         PersistedPageHolder<K, V> pageHolder = new PersistedPageHolder<K, V>( btree, newPage, offset,
2764             lastOffset );
2765 
2766         return pageHolder;
2767     }
2768 
2769 
2770     /* No qualifier */ static void logPageIos( PageIO[] pageIos )
2771     {
2772         int pageNb = 0;
2773 
2774         for ( PageIO pageIo : pageIos )
2775         {
2776             StringBuilder sb = new StringBuilder();
2777             sb.append( "PageIO[" ).append( pageNb ).append( "]:0x" );
2778             sb.append( Long.toHexString( pageIo.getOffset() ) ).append( "/");
2779             sb.append( pageIo.getSize() );
2780             pageNb++;
2781 
2782             ByteBuffer data = pageIo.getData();
2783 
2784             int position = data.position();
2785             int dataLength = (int)pageIo.getSize() + 12;
2786             
2787             if ( dataLength > data.limit() )
2788             {
2789                 dataLength = data.limit();
2790             }
2791             
2792             byte[] bytes = new byte[dataLength];
2793 
2794             data.get( bytes );
2795             data.position( position );
2796             int pos = 0;
2797 
2798             for ( byte b : bytes )
2799             {
2800                 int mod = pos%16;
2801 
2802                 switch ( mod )
2803                 {
2804                     case 0:
2805                         sb.append( "\n    " );
2806                         // No break
2807                     case 4:
2808                     case 8:
2809                     case 12:
2810                         sb.append( " " );
2811                     case 1:
2812                     case 2:
2813                     case 3:
2814                     case 5:
2815                     case 6:
2816                     case 7:
2817                     case 9:
2818                     case 10:
2819                     case 11:
2820                     case 13:
2821                     case 14:
2822                     case 15:
2823                         sb.append( Strings.dumpByte( b ) ).append( " " );
2824                 }
2825                 pos++;
2826             }
2827 
2828             LOG_PAGES.debug( sb.toString() );
2829         }
2830     }
2831 
2832 
2833     /**
2834      * Compute the number of pages needed to store some specific size of data.
2835      *
2836      * @param dataSize The size of the data we want to store in pages
2837      * @return The number of pages needed
2838      */
2839     private int computeNbPages( int dataSize )
2840     {
2841         if ( dataSize <= 0 )
2842         {
2843             return 0;
2844         }
2845 
2846         // Compute the number of pages needed.
2847         // Considering that each page can contain PageSize bytes,
2848         // but that the first 8 bytes are used for links and we
2849         // use 4 bytes to store the data size, the number of needed
2850         // pages is :
2851         // NbPages = ( (dataSize - (PageSize - 8 - 4 )) / (PageSize - 8) ) + 1
2852         // NbPages += ( if (dataSize - (PageSize - 8 - 4 )) % (PageSize - 8) > 0 : 1 : 0 )
2853         int availableSize = ( pageSize - LONG_SIZE );
2854         int nbNeededPages = 1;
2855 
2856         // Compute the number of pages that will be full but the first page
2857         if ( dataSize > availableSize - INT_SIZE )
2858         {
2859             int remainingSize = dataSize - ( availableSize - INT_SIZE );
2860             nbNeededPages += remainingSize / availableSize;
2861             int remain = remainingSize % availableSize;
2862 
2863             if ( remain > 0 )
2864             {
2865                 nbNeededPages++;
2866             }
2867         }
2868 
2869         return nbNeededPages;
2870     }
2871 
2872 
2873     /**
2874      * Get as many pages as needed to store the data of the given size. The returned
2875      * PageIOs are all linked together.
2876      *
2877      * @param dataSize The data size
2878      * @return An array of pages, enough to store the full data
2879      */
2880     private PageIO[] getFreePageIOs( int dataSize ) throws IOException
2881     {
2882         if ( dataSize == 0 )
2883         {
2884             return new PageIO[]
2885                 {};
2886         }
2887 
2888         int nbNeededPages = computeNbPages( dataSize );
2889 
2890         PageIO[] pageIOs = new PageIO[nbNeededPages];
2891 
2892         // The first page : set the size
2893         pageIOs[0] = fetchNewPage();
2894         pageIOs[0].setSize( dataSize );
2895 
2896         for ( int i = 1; i < nbNeededPages; i++ )
2897         {
2898             pageIOs[i] = fetchNewPage();
2899 
2900             // Create the link
2901             pageIOs[i - 1].setNextPage( pageIOs[i].getOffset() );
2902         }
2903 
2904         return pageIOs;
2905     }
2906 
2907 
2908     /**
2909      * Return a new Page. We take one of the existing free pages, or we create
2910      * a new page at the end of the file.
2911      *
2912      * @return The fetched PageIO
2913      */
2914     private PageIO fetchNewPage() throws IOException
2915     {
2916         //dumpFreePages( firstFreePage );
2917 
2918         if ( firstFreePage == NO_PAGE )
2919         {
2920             nbCreatedPages.incrementAndGet();
2921 
2922             // We don't have any free page. Reclaim some new page at the end
2923             // of the file
2924             PageIO newPage = new PageIO( endOfFileOffset );
2925 
2926             endOfFileOffset += pageSize;
2927 
2928             ByteBuffer data = ByteBuffer.allocateDirect( pageSize );
2929 
2930             newPage.setData( data );
2931             newPage.setNextPage( NO_PAGE );
2932             newPage.setSize( 0 );
2933 
2934             LOG.debug( "Requiring a new page at offset {}", newPage.getOffset() );
2935 
2936             return newPage;
2937         }
2938         else
2939         {
2940             nbReusedPages.incrementAndGet();
2941 
2942             freePageLock.lock();
2943             
2944             // We have some existing free page. Fetch it from disk
2945             PageIO pageIo = fetchPage( firstFreePage );
2946 
2947             // Update the firstFreePage pointer
2948             firstFreePage = pageIo.getNextPage();
2949 
2950             freePageLock.unlock();
2951             
2952             // overwrite the data of old page
2953             ByteBuffer data = ByteBuffer.allocateDirect( pageSize );
2954             pageIo.setData( data );
2955 
2956             pageIo.setNextPage( NO_PAGE );
2957             pageIo.setSize( 0 );
2958 
2959             LOG.debug( "Reused page at offset {}", pageIo.getOffset() );
2960 
2961             return pageIo;
2962         }
2963     }
2964 
2965 
2966     /**
2967      * fetch a page from disk, knowing its position in the file.
2968      *
2969      * @param offset The position in the file
2970      * @return The found page
2971      */
2972     /* no qualifier */ PageIO fetchPage( long offset ) throws IOException, EndOfFileExceededException
2973     {
2974         checkOffset( offset );
2975 
2976         if ( fileChannel.size() < offset + pageSize )
2977         {
2978             // Error : we are past the end of the file
2979             throw new EndOfFileExceededException( "We are fetching a page on " + offset +
2980                 " when the file's size is " + fileChannel.size() );
2981         }
2982         else
2983         {
2984             // Read the page
2985             fileChannel.position( offset );
2986 
2987             ByteBuffer data = ByteBuffer.allocate( pageSize );
2988             fileChannel.read( data );
2989             data.rewind();
2990 
2991             PageIO readPage = new PageIO( offset );
2992             readPage.setData( data );
2993 
2994             return readPage;
2995         }
2996     }
2997 
2998 
2999     /**
3000      * @return the pageSize
3001      */
3002     public int getPageSize()
3003     {
3004         return pageSize;
3005     }
3006 
3007 
3008     /**
3009      * Set the page size, ie the number of bytes a page can store.
3010      *
3011      * @param pageSize The number of bytes for a page
3012      */
3013     /* no qualifier */ void setPageSize( int pageSize )
3014     {
3015         if ( this.pageSize >= 13 )
3016         {
3017             this.pageSize = pageSize;
3018         }
3019         else
3020         {
3021             this.pageSize = DEFAULT_PAGE_SIZE;
3022         }
3023     }
3024 
3025 
3026     /**
3027      * Close the RecordManager and flush everything on disk
3028      */
3029     public void close() throws IOException
3030     {
3031         beginTransaction();
3032 
3033         // Close all the managed B-trees
3034         for ( BTree<Object, Object> tree : managedBtrees.values() )
3035         {
3036             tree.close();
3037         }
3038 
3039         // Close the management B-trees
3040         btreeOfBtrees.close();
3041 
3042         managedBtrees.clear();
3043 
3044         // Write the data
3045         fileChannel.force( true );
3046 
3047         // And close the channel
3048         fileChannel.close();
3049 
3050         reclaimer.storeCopiedPageMap( file.getParentFile() );
3051         
3052         commit();
3053     }
3054 
3055 
3056     /** Hex chars */
3057     private static final byte[] HEX_CHAR = new byte[]
3058         { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3059 
3060 
3061     public static String dump( byte octet )
3062     {
3063         return new String( new byte[]
3064             { HEX_CHAR[( octet & 0x00F0 ) >> 4], HEX_CHAR[octet & 0x000F] } );
3065     }
3066 
3067 
3068     /**
3069      * Dump a pageIO
3070      */
3071     private void dump( PageIO pageIo )
3072     {
3073         ByteBuffer buffer = pageIo.getData();
3074         buffer.mark();
3075         byte[] longBuffer = new byte[LONG_SIZE];
3076         byte[] intBuffer = new byte[INT_SIZE];
3077 
3078         // get the next page offset
3079         buffer.get( longBuffer );
3080         long nextOffset = LongSerializer.deserialize( longBuffer );
3081 
3082         // Get the data size
3083         buffer.get( intBuffer );
3084         int size = IntSerializer.deserialize( intBuffer );
3085 
3086         buffer.reset();
3087 
3088         System.out.println( "PageIO[" + Long.toHexString( pageIo.getOffset() ) + "], size = " + size + ", NEXT PageIO:"
3089             + Long.toHexString( nextOffset ) );
3090         System.out.println( " 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F " );
3091         System.out.println( "+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+" );
3092 
3093         for ( int i = 0; i < buffer.limit(); i += 16 )
3094         {
3095             System.out.print( "|" );
3096 
3097             for ( int j = 0; j < 16; j++ )
3098             {
3099                 System.out.print( dump( buffer.get() ) );
3100 
3101                 if ( j == 15 )
3102                 {
3103                     System.out.println( "|" );
3104                 }
3105                 else
3106                 {
3107                     System.out.print( " " );
3108                 }
3109             }
3110         }
3111 
3112         System.out.println( "+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+" );
3113 
3114         buffer.reset();
3115     }
3116 
3117 
3118     /**
3119      * Dump the RecordManager file
3120      * @throws IOException
3121      */
3122     public void dump()
3123     {
3124         System.out.println( "/---------------------------- Dump ----------------------------\\" );
3125 
3126         try
3127         {
3128             RandomAccessFile randomFile = new RandomAccessFile( file, "r" );
3129             FileChannel fileChannel = randomFile.getChannel();
3130 
3131             ByteBuffer recordManagerHeader = ByteBuffer.allocate( RECORD_MANAGER_HEADER_SIZE );
3132 
3133             // load the RecordManager header
3134             fileChannel.read( recordManagerHeader );
3135 
3136             recordManagerHeader.rewind();
3137 
3138             // The page size
3139             long fileSize = fileChannel.size();
3140             int pageSize = recordManagerHeader.getInt();
3141             long nbPages = fileSize / pageSize;
3142 
3143             // The number of managed B-trees
3144             int nbBtree = recordManagerHeader.getInt();
3145 
3146             // The first free page
3147             long firstFreePage = recordManagerHeader.getLong();
3148 
3149             // The current B-tree of B-trees
3150             long currentBtreeOfBtreesPage = recordManagerHeader.getLong();
3151 
3152             // The previous B-tree of B-trees
3153             long previousBtreeOfBtreesPage = recordManagerHeader.getLong();
3154 
3155             // The current CopiedPages B-tree
3156             long currentCopiedPagesBtreePage = recordManagerHeader.getLong();
3157 
3158             // The previous CopiedPages B-tree
3159             long previousCopiedPagesBtreePage = recordManagerHeader.getLong();
3160 
3161             System.out.println( "  RecordManager" );
3162             System.out.println( "  -------------" );
3163             System.out.println( "  Size = 0x" + Long.toHexString( fileSize ) );
3164             System.out.println( "  NbPages = " + nbPages );
3165             System.out.println( "    Header " );
3166             System.out.println( "      page size : " + pageSize );
3167             System.out.println( "      nbTree : " + nbBtree );
3168             System.out.println( "      firstFreePage : 0x" + Long.toHexString( firstFreePage ) );
3169             System.out.println( "      current BOB : 0x" + Long.toHexString( currentBtreeOfBtreesPage ) );
3170             System.out.println( "      previous BOB : 0x" + Long.toHexString( previousBtreeOfBtreesPage ) );
3171             System.out.println( "      current CopiedPages : 0x" + Long.toHexString( currentCopiedPagesBtreePage ) );
3172             System.out.println( "      previous CopiedPages : 0x" + Long.toHexString( previousCopiedPagesBtreePage ) );
3173 
3174             // Dump the Free pages list
3175             dumpFreePages( firstFreePage );
3176 
3177             // Dump the B-tree of B-trees
3178             dumpBtreeHeader( currentBtreeOfBtreesPage );
3179 
3180             // Dump the previous B-tree of B-trees if any
3181             if ( previousBtreeOfBtreesPage != NO_PAGE )
3182             {
3183                 dumpBtreeHeader( previousBtreeOfBtreesPage );
3184             }
3185 
3186             // Dump the CopiedPages B-tree
3187             dumpBtreeHeader( currentCopiedPagesBtreePage );
3188 
3189 
3190             // Dump the previous B-tree of B-trees if any
3191             if ( previousCopiedPagesBtreePage != NO_PAGE )
3192             {
3193                 dumpBtreeHeader( previousCopiedPagesBtreePage );
3194             }
3195 
3196             // Dump all the user's B-tree
3197             randomFile.close();
3198             System.out.println( "\\---------------------------- Dump ----------------------------/" );
3199         }
3200         catch ( IOException ioe )
3201         {
3202             System.out.println( "Exception while dumping the file : " + ioe.getMessage() );
3203         }
3204     }
3205 
3206 
3207     /**
3208      * Dump the free pages
3209      */
3210     private void dumpFreePages( long freePageOffset ) throws EndOfFileExceededException, IOException
3211     {
3212         System.out.println( "\n  FreePages : " );
3213         int pageNb = 1;
3214 
3215         while ( freePageOffset != NO_PAGE )
3216         {
3217             PageIO pageIo = fetchPage( freePageOffset );
3218 
3219             System.out.println( "    freePage[" + pageNb + "] : 0x" + Long.toHexString( pageIo.getOffset() ) );
3220 
3221             freePageOffset = pageIo.getNextPage();
3222             pageNb++;
3223         }
3224     }
3225 
3226 
3227     /**
3228      * Dump a B-tree Header
3229      */
3230     private long dumpBtreeHeader( long btreeOffset ) throws EndOfFileExceededException, IOException
3231     {
3232         // First read the B-tree header
3233         PageIO[] pageIos = readPageIOs( btreeOffset, Long.MAX_VALUE );
3234 
3235         long dataPos = 0L;
3236 
3237         // The B-tree current revision
3238         long revision = readLong( pageIos, dataPos );
3239         dataPos += LONG_SIZE;
3240 
3241         // The nb elems in the tree
3242         long nbElems = readLong( pageIos, dataPos );
3243         dataPos += LONG_SIZE;
3244 
3245         // The B-tree rootPage offset
3246         long rootPageOffset = readLong( pageIos, dataPos );
3247         dataPos += LONG_SIZE;
3248 
3249         // The B-tree page size
3250         int btreePageSize = readInt( pageIos, dataPos );
3251         dataPos += INT_SIZE;
3252 
3253         // The tree name
3254         ByteBuffer btreeNameBytes = readBytes( pageIos, dataPos );
3255         dataPos += INT_SIZE + btreeNameBytes.limit();
3256         String btreeName = Strings.utf8ToString( btreeNameBytes );
3257 
3258         // The keySerializer FQCN
3259         ByteBuffer keySerializerBytes = readBytes( pageIos, dataPos );
3260         dataPos += INT_SIZE + keySerializerBytes.limit();
3261 
3262         String keySerializerFqcn = "";
3263 
3264         if ( keySerializerBytes != null )
3265         {
3266             keySerializerFqcn = Strings.utf8ToString( keySerializerBytes );
3267         }
3268 
3269         // The valueSerialier FQCN
3270         ByteBuffer valueSerializerBytes = readBytes( pageIos, dataPos );
3271 
3272         String valueSerializerFqcn = "";
3273         dataPos += INT_SIZE + valueSerializerBytes.limit();
3274 
3275         if ( valueSerializerBytes != null )
3276         {
3277             valueSerializerFqcn = Strings.utf8ToString( valueSerializerBytes );
3278         }
3279 
3280         // The B-tree allowDuplicates flag
3281         int allowDuplicates = readInt( pageIos, dataPos );
3282         boolean dupsAllowed = allowDuplicates != 0;
3283 
3284         dataPos += INT_SIZE;
3285 
3286 //        System.out.println( "\n  B-Tree " + btreeName );
3287 //        System.out.println( "  ------------------------- " );
3288 
3289 //        System.out.println( "    nbPageIOs[" + pageIos.length + "] = " + pageIoList );
3290         if ( LOG.isDebugEnabled() )
3291         {
3292             StringBuilder sb = new StringBuilder();
3293             boolean isFirst = true;
3294 
3295             for ( PageIO pageIo : pageIos )
3296             {
3297                 if ( isFirst )
3298                 {
3299                     isFirst = false;
3300                 }
3301                 else
3302                 {
3303                     sb.append( ", " );
3304                 }
3305 
3306                 sb.append( "0x" ).append( Long.toHexString( pageIo.getOffset() ) );
3307             }
3308 
3309             String pageIoList = sb.toString();
3310 
3311             LOG.debug( "    PageIOs[{}] = {}", pageIos.length, pageIoList );
3312 
3313 //        System.out.println( "    dataSize = "+ pageIos[0].getSize() );
3314             LOG.debug( "    dataSize = {}", pageIos[0].getSize() );
3315 
3316             LOG.debug( "    B-tree '{}'", btreeName );
3317             LOG.debug( "    revision : {}", revision );
3318             LOG.debug( "    nbElems : {}", nbElems );
3319             LOG.debug( "    rootPageOffset : 0x{}", Long.toHexString( rootPageOffset ) );
3320             LOG.debug( "    B-tree page size : {}", btreePageSize );
3321             LOG.debug( "    keySerializer : '{}'", keySerializerFqcn );
3322             LOG.debug( "    valueSerializer : '{}'", valueSerializerFqcn );
3323             LOG.debug( "    dups allowed : {}", dupsAllowed );
3324 //
3325 //        System.out.println( "    B-tree '" + btreeName + "'" );
3326 //        System.out.println( "    revision : " + revision );
3327 //        System.out.println( "    nbElems : " + nbElems );
3328 //        System.out.println( "    rootPageOffset : 0x" + Long.toHexString( rootPageOffset ) );
3329 //        System.out.println( "    B-tree page size : " + btreePageSize );
3330 //        System.out.println( "    keySerializer : " + keySerializerFqcn );
3331 //        System.out.println( "    valueSerializer : " + valueSerializerFqcn );
3332 //        System.out.println( "    dups allowed : " + dupsAllowed );
3333         }
3334 
3335         return rootPageOffset;
3336     }
3337 
3338 
3339     /**
3340      * Get the number of managed trees. We don't count the CopiedPage B-tree and the B-tree of B-trees
3341      *
3342      * @return The number of managed B-trees
3343      */
3344     public int getNbManagedTrees()
3345     {
3346         return nbBtree;
3347     }
3348 
3349 
3350     /**
3351      * Get the managed B-trees. We don't return the CopiedPage B-tree nor the B-tree of B-trees.
3352      *
3353      * @return The managed B-trees
3354      */
3355     public Set<String> getManagedTrees()
3356     {
3357         Set<String> btrees = new HashSet<String>( managedBtrees.keySet() );
3358 
3359         return btrees;
3360     }
3361 
3362 
3363     /**
3364      * Stores the copied pages into the CopiedPages B-tree
3365      *
3366      * @param name The B-tree name
3367      * @param revision The revision
3368      * @param copiedPages The pages that have been copied while creating this revision
3369      * @throws IOException If we weren't able to store the data on disk
3370      */
3371     /* No Qualifier */ void storeCopiedPages( String name, long revision, long[] copiedPages ) throws IOException
3372     {
3373         RevisionName revisionName = new RevisionName( revision, name );
3374 
3375         copiedPageMap.put( revisionName, copiedPages );
3376     }
3377 
3378 
3379     /**
3380      * Store a reference to an old rootPage into the Revision B-tree
3381      *
3382      * @param btree The B-tree we want to keep an old RootPage for
3383      * @param rootPage The old rootPage
3384      * @throws IOException If we have an issue while writing on disk
3385      */
3386     /* No qualifier */<K, V> void storeRootPage( BTree<K, V> btree, Page<K, V> rootPage ) throws IOException
3387     {
3388         if ( !isKeepRevisions() )
3389         {
3390             return;
3391         }
3392 
3393         NameRevision nameRevision = new NameRevision( btree.getName(), rootPage.getRevision() );
3394 
3395         ( ( AbstractBTree<NameRevision, Long> ) btreeOfBtrees ).insert( nameRevision,
3396             ( ( AbstractPage<K, V> ) rootPage ).getOffset(), 0 );
3397 
3398         if ( LOG_CHECK.isDebugEnabled() )
3399         {
3400             MavibotInspector.check( this );
3401         }
3402     }
3403 
3404 
3405     /**
3406      * Fetch the rootPage of a given B-tree for a given revision.
3407      *
3408      * @param btree The B-tree we are interested in
3409      * @param revision The revision we want to get back
3410      * @return The rootPage for this B-tree and this revision, if any
3411      * @throws KeyNotFoundException If we can't find the rootPage for this revision and this B-tree
3412      * @throws IOException If we had an ise while accessing the data on disk
3413      */
3414     /* No qualifier */<K, V> Page<K, V> getRootPage( BTree<K, V> btree, long revision ) throws KeyNotFoundException,
3415         IOException
3416     {
3417         if ( btree.getRevision() == revision )
3418         {
3419             // We are asking for the current revision
3420             return btree.getRootPage();
3421         }
3422 
3423         // Get the B-tree header offset
3424         NameRevision nameRevision = new NameRevision( btree.getName(), revision );
3425         long btreeHeaderOffset = btreeOfBtrees.get( nameRevision );
3426 
3427         // get the B-tree rootPage
3428         Page<K, V> btreeRoot = readRootPage( btree, btreeHeaderOffset );
3429 
3430         return btreeRoot;
3431     }
3432 
3433 
3434     /**
3435      * Read a root page from the B-tree header offset
3436      */
3437     private <K, V> Page<K, V> readRootPage( BTree<K, V> btree, long btreeHeaderOffset ) throws EndOfFileExceededException, IOException
3438     {
3439         // Read the B-tree header pages on disk
3440         PageIO[] btreeHeaderPageIos = readPageIOs( btreeHeaderOffset, Long.MAX_VALUE );
3441         long dataPos = LONG_SIZE + LONG_SIZE;
3442 
3443         // The B-tree rootPage offset
3444         long rootPageOffset = readLong( btreeHeaderPageIos, dataPos );
3445 
3446         // Read the rootPage pages on disk
3447         PageIO[] rootPageIos = readPageIOs( rootPageOffset, Long.MAX_VALUE );
3448 
3449         // Now, convert it to a Page
3450         Page<K, V> btreeRoot = readPage( btree, rootPageIos );
3451 
3452         return btreeRoot;
3453     }
3454 
3455 
3456     /**
3457      * Get one managed trees, knowing its name.
3458      *
3459      * @param name The B-tree name we are looking for
3460      * @return The managed B-trees
3461      */
3462     public <K, V> BTree<K, V> getManagedTree( String name )
3463     {
3464         return ( BTree<K, V> ) managedBtrees.get( name );
3465     }
3466 
3467 
3468     /**
3469      * Move a list of pages to the free page list. A logical page is associated with one
3470      * or more physical PageIOs, which are on the disk. We have to move all those PagIO instances
3471      * to the free list, and do the same in memory (we try to keep a reference to a set of
3472      * free pages.
3473      *
3474      * @param btree The B-tree which were owning the pages
3475      * @param revision The current revision
3476      * @param pages The pages to free
3477      * @throws IOException If we had a problem while updating the file
3478      * @throws EndOfFileExceededException If we tried to write after the end of the file
3479      */
3480     /* Package protected */<K, V> void freePages( BTree<K, V> btree, long revision, List<Page<K, V>> pages )
3481         throws EndOfFileExceededException, IOException
3482     {
3483         if ( ( pages == null ) || pages.isEmpty() )
3484         {
3485             return;
3486         }
3487 
3488         if ( !keepRevisions )
3489         {
3490             // if the B-tree doesn't keep revisions, we can safely move
3491             // the pages to the freed page list.
3492             if ( LOG.isDebugEnabled() )
3493             {
3494                 LOG.debug( "Freeing the following pages :" );
3495 
3496                 for ( Page<K, V> page : pages )
3497                 {
3498                     LOG.debug(  "    {}", page );
3499                 }
3500             }
3501 
3502             for ( Page<K, V> page : pages )
3503             {
3504                 long pageOffset = ((AbstractPage<K, V>)page).getOffset();
3505 
3506                 PageIO[] pageIos = readPageIOs( pageOffset, Long.MAX_VALUE );
3507 
3508                 for ( PageIO pageIo : pageIos )
3509                 {
3510                     freedPages.add( pageIo );
3511                 }
3512             }
3513         }
3514         else
3515         {
3516             // We are keeping revisions of standard B-trees, so we move the pages to the CopiedPages B-tree
3517             // but only for non managed B-trees
3518             if ( LOG.isDebugEnabled() )
3519             {
3520                 LOG.debug( "Moving the following pages to the CopiedBtree :" );
3521 
3522                 for ( Page<K, V> page : pages )
3523                 {
3524                     LOG.debug(  "    {}", page );
3525                 }
3526             }
3527 
3528             long[] pageOffsets = new long[pages.size()];
3529             int pos = 0;
3530 
3531             for ( Page<K, V> page : pages )
3532             {
3533                 pageOffsets[pos++] = ((AbstractPage<K, V>)page).offset;
3534             }
3535 
3536             if ( ( btree.getType() != BTreeTypeEnum.BTREE_OF_BTREES ) && ( btree.getType() != BTreeTypeEnum.COPIED_PAGES_BTREE ) )
3537             {
3538                 // Deal with standard B-trees
3539                 RevisionName revisionName = new RevisionName( revision, btree.getName() );
3540 
3541                 copiedPageMap.put( revisionName, pageOffsets );
3542             }
3543             else
3544             {
3545                 // Managed B-trees : we simply free the copied pages
3546                 for ( long pageOffset : pageOffsets )
3547                 {
3548                     PageIO[] pageIos = readPageIOs( pageOffset, Long.MAX_VALUE );
3549 
3550                     for ( PageIO pageIo : pageIos )
3551                     {
3552                         freedPages.add( pageIo );
3553                     }
3554                 }
3555             }
3556         }
3557     }
3558 
3559 
3560     /**
3561      * Add a PageIO to the list of free PageIOs
3562      *
3563      * @param pageIo The page to free
3564      * @throws IOException If we weren't capable of updating the file
3565      */
3566     private void free( PageIO pageIo ) throws IOException
3567     {
3568         freePageLock.lock();
3569         
3570         // We add the Page's PageIOs before the
3571         // existing free pages.
3572         // Link it to the first free page
3573         pageIo.setNextPage( firstFreePage );
3574 
3575         LOG.debug( "Flushing the first free page" );
3576 
3577         // And flush it to disk
3578         //FIXME can be flushed last after releasing the lock
3579         flushPages( pageIo );
3580 
3581         // We can update the firstFreePage offset
3582         firstFreePage = pageIo.getOffset();
3583         
3584         freePageLock.unlock();
3585     }
3586 
3587 
3588     /**
3589      * Add an array of PageIOs to the list of free PageIOs
3590      *
3591      * @param offsets The offsets of the pages whose associated PageIOs will be fetched and freed.
3592      * @throws IOException If we weren't capable of updating the file
3593      */
3594     public void free( long[] offsets ) throws IOException
3595     {
3596         List<PageIO> pageIos = new ArrayList<PageIO>();
3597         int pageIndex = 0;
3598         for( int i=0; i < offsets.length; i++ )
3599         {
3600             PageIO[] ios = readPageIOs( offsets[i], Long.MAX_VALUE );
3601             for( PageIO io : ios )
3602             {
3603                 pageIos.add( io );
3604                 
3605                 if( pageIndex > 0 )
3606                 {
3607                     pageIos.get( pageIndex - 1 ).setNextPage( io.getOffset() );
3608                 }
3609                 
3610                 pageIndex++;
3611             }
3612         }
3613 
3614         freePageLock.lock();
3615         
3616         // We add the Page's PageIOs before the
3617         // existing free pages.
3618         // Link it to the first free page
3619         pageIos.get( pageIndex -1 ).setNextPage( firstFreePage );
3620 
3621         LOG.debug( "Flushing the first free page" );
3622 
3623         // And flush it to disk
3624         //FIXME can be flushed last after releasing the lock
3625         flushPages( pageIos.toArray( new PageIO[0] ) );
3626 
3627         // We can update the firstFreePage offset
3628         firstFreePage = pageIos.get( 0 ).getOffset();
3629         
3630         freePageLock.unlock();
3631     }
3632 
3633     
3634     /**
3635      * @return the keepRevisions flag
3636      */
3637     public boolean isKeepRevisions()
3638     {
3639         return keepRevisions;
3640     }
3641 
3642 
3643     /**
3644      * @param keepRevisions the keepRevisions flag to set
3645      */
3646     public void setKeepRevisions( boolean keepRevisions )
3647     {
3648         this.keepRevisions = keepRevisions;
3649     }
3650 
3651 
3652     /**
3653      * Creates a B-tree and automatically adds it to the list of managed btrees
3654      *
3655      * @param name the name of the B-tree
3656      * @param keySerializer key serializer
3657      * @param valueSerializer value serializer
3658      * @param allowDuplicates flag for allowing duplicate keys
3659      * @return a managed B-tree
3660      * @throws IOException If we weren't able to update the file on disk
3661      * @throws BTreeAlreadyManagedException If the B-tree is already managed
3662      */
3663     @SuppressWarnings("all")
3664     public <K, V> BTree<K, V> addBTree( String name, ElementSerializer<K> keySerializer,
3665         ElementSerializer<V> valueSerializer, boolean allowDuplicates )
3666             throws IOException, BTreeAlreadyManagedException
3667     {
3668         PersistedBTreeConfiguration config = new PersistedBTreeConfiguration();
3669 
3670         config.setName( name );
3671         config.setKeySerializer( keySerializer );
3672         config.setValueSerializer( valueSerializer );
3673         config.setAllowDuplicates( allowDuplicates );
3674 
3675         BTree btree = new PersistedBTree( config );
3676         manage( btree );
3677 
3678         if ( LOG_CHECK.isDebugEnabled() )
3679         {
3680             MavibotInspector.check( this );
3681         }
3682 
3683         return btree;
3684     }
3685 
3686     
3687     /**
3688      * Add a newly closd transaction into the closed transaction queue
3689      */
3690     /* no qualifier */ <K, V> void releaseTransaction( ReadTransaction<K, V> readTransaction )
3691     {
3692         RevisionName revisionName = new RevisionName( 
3693             readTransaction.getRevision(), 
3694             readTransaction.getBtreeHeader().getBtree().getName() );
3695         //closedTransactionsQueue.add( revisionName );
3696     }
3697     
3698     
3699     /**
3700      * Get the current BTreeHeader for a given Btree. It might not exist
3701      */
3702     public BTreeHeader getBTreeHeader( String name )
3703     {
3704         // Get a lock
3705         btreeHeadersLock.readLock().lock();
3706         
3707         // get the current BTree Header for this BTree and revision
3708         BTreeHeader<?, ?> btreeHeader = currentBTreeHeaders.get( name );
3709         
3710         // And unlock 
3711         btreeHeadersLock.readLock().unlock();
3712 
3713         return btreeHeader;
3714     }
3715     
3716     
3717     /**
3718      * Get the new BTreeHeader for a given Btree. It might not exist
3719      */
3720     public BTreeHeader getNewBTreeHeader( String name )
3721     {
3722         // get the current BTree Header for this BTree and revision
3723         BTreeHeader<?, ?> btreeHeader = newBTreeHeaders.get( name );
3724 
3725         return btreeHeader;
3726     }
3727     
3728     
3729     /**
3730      * {@inheritDoc}
3731      */
3732     public void updateNewBTreeHeaders( BTreeHeader btreeHeader )
3733     {
3734         newBTreeHeaders.put( btreeHeader.getBtree().getName(), btreeHeader );
3735     }
3736     
3737     
3738     /**
3739      * Swap the current BtreeHeader map with the new one. This method will only
3740      * be called in a single trhead, when the current transaction will be committed.
3741      */
3742     private void swapCurrentBtreeHeaders()
3743     {
3744         // Copy the reference to the current BtreeHeader Map
3745         Map<String, BTreeHeader<?, ?>> tmp = currentBTreeHeaders;
3746         
3747         // Get a write lock
3748         btreeHeadersLock.writeLock().lock();
3749 
3750         // Swap the new BTreeHeader Map
3751         currentBTreeHeaders = newBTreeHeaders;
3752         
3753         // And unlock 
3754         btreeHeadersLock.writeLock().unlock();
3755 
3756         // Last, not least, clear the Map and reinject the latest revision in it
3757         tmp.clear();
3758         tmp.putAll( currentBTreeHeaders );
3759 
3760         // And update the new BTreeHeader map
3761         newBTreeHeaders = tmp;
3762     }
3763     
3764     
3765     /**
3766      * revert the new BTreeHeaders Map to the current BTreeHeader Map. This method
3767      * is called when we have to rollback a transaction.
3768      */
3769     private void revertBtreeHeaders()
3770     {
3771         // Clean up teh new BTreeHeaders Map
3772         newBTreeHeaders.clear();
3773         
3774         // Reinject the latest revision in it
3775         newBTreeHeaders.putAll( currentBTreeHeaders );
3776     }
3777 
3778     
3779     /**
3780      * Loads a B-tree holding the values of a duplicate key
3781      * This tree is also called as dups tree or sub tree
3782      *
3783      * @param offset the offset of the B-tree header
3784      * @return the deserialized B-tree
3785      */
3786     /* No qualifier */<K, V> BTree<V, V> loadDupsBtree( long btreeHeaderOffset, BTree<K, V> parentBtree )
3787     {
3788         try
3789         {
3790             PageIO[] pageIos = readPageIOs( btreeHeaderOffset, Long.MAX_VALUE );
3791 
3792             BTree<V, V> subBtree = BTreeFactory.<V, V> createPersistedBTree( BTreeTypeEnum.PERSISTED_SUB );
3793             loadBtree( pageIos, subBtree, parentBtree );
3794             
3795 
3796             return subBtree;
3797         }
3798         catch ( Exception e )
3799         {
3800             // should not happen
3801             throw new BTreeCreationException( e );
3802         }
3803     }
3804 
3805 
3806     /**
3807      * @see Object#toString()
3808      */
3809     public String toString()
3810     {
3811         StringBuilder sb = new StringBuilder();
3812 
3813         sb.append( "RM free pages : [" );
3814 
3815         if ( firstFreePage != NO_PAGE )
3816         {
3817             long current = firstFreePage;
3818             boolean isFirst = true;
3819 
3820             while ( current != NO_PAGE )
3821             {
3822                 if ( isFirst )
3823                 {
3824                     isFirst = false;
3825                 }
3826                 else
3827                 {
3828                     sb.append( ", " );
3829                 }
3830 
3831                 PageIO pageIo;
3832 
3833                 try
3834                 {
3835                     pageIo = fetchPage( current );
3836                     sb.append( pageIo.getOffset() );
3837                     current = pageIo.getNextPage();
3838                 }
3839                 catch ( EndOfFileExceededException e )
3840                 {
3841                     e.printStackTrace();
3842                 }
3843                 catch ( IOException e )
3844                 {
3845                     e.printStackTrace();
3846                 }
3847 
3848             }
3849         }
3850 
3851         sb.append( "]" );
3852 
3853         return sb.toString();
3854     }
3855 }