View Javadoc
1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one
3    *  or more contributor license agreements.  See the NOTICE file
4    *  distributed with this work for additional information
5    *  regarding copyright ownership.  The ASF licenses this file
6    *  to you under the Apache License, Version 2.0 (the
7    *  "License"); you may not use this file except in compliance
8    *  with the License.  You may obtain a copy of the License at
9    *
10   *    http://www.apache.org/licenses/LICENSE-2.0
11   *
12   *  Unless required by applicable law or agreed to in writing,
13   *  software distributed under the License is distributed on an
14   *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   *  KIND, either express or implied.  See the License for the
16   *  specific language governing permissions and limitations
17   *  under the License.
18   *
19   */
20  package org.apache.directory.mavibot.btree;
21  
22  
23  import java.io.File;
24  import java.io.IOException;
25  import java.io.RandomAccessFile;
26  import java.nio.ByteBuffer;
27  import java.nio.channels.FileChannel;
28  import java.util.ArrayList;
29  import java.util.HashMap;
30  import java.util.HashSet;
31  import java.util.LinkedHashMap;
32  import java.util.List;
33  import java.util.Map;
34  import java.util.Queue;
35  import java.util.Set;
36  import java.util.concurrent.LinkedBlockingQueue;
37  import java.util.concurrent.atomic.AtomicLong;
38  import java.util.concurrent.locks.Lock;
39  import java.util.concurrent.locks.ReadWriteLock;
40  import java.util.concurrent.locks.ReentrantLock;
41  import java.util.concurrent.locks.ReentrantReadWriteLock;
42  
43  import org.apache.directory.mavibot.btree.exception.BTreeAlreadyManagedException;
44  import org.apache.directory.mavibot.btree.exception.BTreeCreationException;
45  import org.apache.directory.mavibot.btree.exception.EndOfFileExceededException;
46  import org.apache.directory.mavibot.btree.exception.FileException;
47  import org.apache.directory.mavibot.btree.exception.InvalidOffsetException;
48  import org.apache.directory.mavibot.btree.exception.KeyNotFoundException;
49  import org.apache.directory.mavibot.btree.exception.RecordManagerException;
50  import org.apache.directory.mavibot.btree.serializer.ElementSerializer;
51  import org.apache.directory.mavibot.btree.serializer.IntSerializer;
52  import org.apache.directory.mavibot.btree.serializer.LongArraySerializer;
53  import org.apache.directory.mavibot.btree.serializer.LongSerializer;
54  import org.apache.directory.mavibot.btree.util.Strings;
55  import org.slf4j.Logger;
56  import org.slf4j.LoggerFactory;
57  
58  
59  /**
60   * The RecordManager is used to manage the file in which we will store the B-trees.
61   * A RecordManager will manage more than one B-tree.<br/>
62   *
63   * It stores data in fixed size pages (default size is 512 bytes), which may be linked one to
64   * the other if the data we want to store is too big for a page.
65   *
66   * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
67   */
68  public class RecordManager extends AbstractTransactionManager
69  {
70      /** The LoggerFactory used by this class */
71      protected static final Logger LOG = LoggerFactory.getLogger( RecordManager.class );
72  
73      /** The LoggerFactory used by this class */
74      protected static final Logger LOG_PAGES = LoggerFactory.getLogger( "org.apache.directory.mavibot.LOG_PAGES" );
75  
76      /** A dedicated logger for the check */
77      protected static final Logger LOG_CHECK = LoggerFactory.getLogger( "org.apache.directory.mavibot.LOG_CHECK" );
78  
79      /** The associated file */
80      private File file;
81  
82      /** The channel used to read and write data */
83      /* no qualifier */ FileChannel fileChannel;
84  
85      /** The number of managed B-trees */
86      /* no qualifier */ int nbBtree;
87  
88      /** The first and last free page */
89      /* no qualifier */ long firstFreePage;
90  
91      /** The list of available free pages */
92      List<PageIO> freePages = new ArrayList<PageIO>();
93  
94      /** Some counters to track the number of free pages */
95      public AtomicLong nbFreedPages = new AtomicLong( 0 );
96      public AtomicLong nbCreatedPages = new AtomicLong( 0 );
97      public AtomicLong nbReusedPages = new AtomicLong( 0 );
98      public AtomicLong nbUpdateRMHeader = new AtomicLong( 0 );
99      public AtomicLong nbUpdateBtreeHeader = new AtomicLong( 0 );
100     public AtomicLong nbUpdatePageIOs = new AtomicLong( 0 );
101 
102     /** The offset of the end of the file */
103     private long endOfFileOffset;
104 
105     /**
106      * A B-tree used to manage the page that has been copied in a new version.
107      * Those pages can be reclaimed when the associated version is dead.
108      **/
109     private BTree<RevisionName, long[]> copiedPageBtree;
110 
111     /** A constant for an offset on a non existing page */
112     public static final long NO_PAGE = -1L;
113 
114     /** The number of element we can store in a page */
115     private static final int PAGE_SIZE = 4;
116 
117     /** The size of the link to next page */
118     private static final int LINK_SIZE = 8;
119 
120     /** Some constants */
121     private static final int BYTE_SIZE = 1;
122     /* no qualifier */ static final int INT_SIZE = 4;
123     /* no qualifier */ static final int LONG_SIZE = 8;
124 
125     /** The default page size */
126     public static final int DEFAULT_PAGE_SIZE = 512;
127 
128     /** The minimal page size. Can't be below 64, as we have to store many thing sin the RMHeader */
129     private static final int MIN_PAGE_SIZE = 64;
130 
131     /** The RecordManager header size */
132     /* no qualifier */ static int RECORD_MANAGER_HEADER_SIZE = DEFAULT_PAGE_SIZE;
133 
134     /** A global buffer used to store the RecordManager header */
135     private ByteBuffer RECORD_MANAGER_HEADER_BUFFER;
136 
137     /** A static buffer used to store the RecordManager header */
138     private byte[] RECORD_MANAGER_HEADER_BYTES;
139 
140     /** The length of an Offset, as a negative value */
141     private  byte[] LONG_LENGTH = new byte[]
142         { ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xF8 };
143 
144     /** The RecordManager underlying page size. */
145     /* no qualifier */ int pageSize = DEFAULT_PAGE_SIZE;
146 
147     /** The set of managed B-trees */
148     private Map<String, BTree<Object, Object>> managedBtrees;
149     
150     /** The queue of recently closed transactions */
151     private Queue<RevisionName> closedTransactionsQueue = new LinkedBlockingQueue<RevisionName>();
152 
153     /** The default file name */
154     private static final String DEFAULT_FILE_NAME = "mavibot.db";
155 
156     /** A flag set to true if we want to keep old revisions */
157     private boolean keepRevisions;
158 
159     /** A flag used by internal btrees */
160     public static final boolean INTERNAL_BTREE = true;
161 
162     /** A flag used by internal btrees */
163     public static final boolean NORMAL_BTREE = false;
164 
165     /** The B-tree of B-trees */
166     private BTree<NameRevision, Long> btreeOfBtrees;
167 
168     /** The B-tree of B-trees management btree name */
169     /* no qualifier */ static final String BTREE_OF_BTREES_NAME = "_btree_of_btrees_";
170 
171     /** The CopiedPages management btree name */
172     /* no qualifier */ static final String COPIED_PAGE_BTREE_NAME = "_copiedPageBtree_";
173 
174     /** The current B-tree of B-trees header offset */
175     /* no qualifier */ long currentBtreeOfBtreesOffset;
176 
177     /** The previous B-tree of B-trees header offset */
178     private long previousBtreeOfBtreesOffset = NO_PAGE;
179 
180     /** The offset on the current copied pages B-tree */
181     /* no qualifier */ long currentCopiedPagesBtreeOffset = NO_PAGE;
182 
183     /** The offset on the previous copied pages B-tree */
184     private long previousCopiedPagesBtreeOffset = NO_PAGE;
185 
186     /** A lock to protect the transaction handling */
187     private Lock transactionLock = new ReentrantLock();
188     
189     /** A ThreadLocalStorage used to store the current transaction */
190     private static final ThreadLocal<Integer> context = new ThreadLocal<Integer>();
191 
192     /** The list of PageIO that can be freed after a commit */
193     List<PageIO> freedPages = new ArrayList<PageIO>();
194 
195     /** The list of PageIO that can be freed after a roolback */
196     private List<PageIO> allocatedPages = new ArrayList<PageIO>();
197     
198     /** A Map keeping the latest revisions for each managed BTree */
199     private Map<String, BTreeHeader<?, ?>> currentBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
200 
201     /** A Map storing the new revisions when some change have been made in some BTrees */
202     private Map<String, BTreeHeader<?, ?>> newBTreeHeaders = new HashMap<String, BTreeHeader<?, ?>>();
203     
204     /** A lock to protect the BtreeHeader maps */
205     private ReadWriteLock btreeHeadersLock = new ReentrantReadWriteLock();
206     
207     /** A value stored into the transaction context for rollbacked transactions */
208     private static final int ROLLBACKED_TXN = 0;
209     
210     /**
211      * Create a Record manager which will either create the underlying file
212      * or load an existing one. If a folder is provided, then we will create
213      * a file with a default name : mavibot.db
214      *
215      * @param name The file name, or a folder name
216      */
217     public RecordManager( String fileName )
218     {
219         this( fileName, DEFAULT_PAGE_SIZE );
220     }
221 
222 
223     /**
224      * Create a Record manager which will either create the underlying file
225      * or load an existing one. If a folder is provider, then we will create
226      * a file with a default name : mavibot.db
227      *
228      * @param name The file name, or a folder name
229      * @param pageSize the size of a page on disk, in bytes
230      */
231     public RecordManager( String fileName, int pageSize )
232     {
233         managedBtrees = new LinkedHashMap<String, BTree<Object, Object>>();
234 
235         if ( pageSize < MIN_PAGE_SIZE )
236         {
237             this.pageSize = MIN_PAGE_SIZE;
238         }
239         else
240         {
241             this.pageSize = pageSize;
242         }
243 
244         RECORD_MANAGER_HEADER_BUFFER = ByteBuffer.allocate( this.pageSize );
245         RECORD_MANAGER_HEADER_BYTES = new byte[this.pageSize];
246         RECORD_MANAGER_HEADER_SIZE = this.pageSize;
247 
248         // Open the file or create it
249         File tmpFile = new File( fileName );
250 
251         if ( tmpFile.isDirectory() )
252         {
253             // It's a directory. Check that we don't have an existing mavibot file
254             tmpFile = new File( tmpFile, DEFAULT_FILE_NAME );
255         }
256 
257         // We have to create a new file, if it does not already exist
258         boolean isNewFile = createFile( tmpFile );
259 
260         try
261         {
262             RandomAccessFile randomFile = new RandomAccessFile( file, "rw" );
263             fileChannel = randomFile.getChannel();
264 
265             // get the current end of file offset
266             endOfFileOffset = fileChannel.size();
267 
268             if ( isNewFile )
269             {
270                 initRecordManager();
271             }
272             else
273             {
274                 loadRecordManager();
275             }
276         }
277         catch ( Exception e )
278         {
279             LOG.error( "Error while initializing the RecordManager : {}", e.getMessage() );
280             LOG.error( "", e );
281             throw new RecordManagerException( e );
282         }
283     }
284 
285 
286     /**
287      * Create the mavibot file if it does not exist
288      */
289     private boolean createFile( File mavibotFile )
290     {
291         try
292         {
293             boolean creation = mavibotFile.createNewFile();
294 
295             file = mavibotFile;
296 
297             if ( mavibotFile.length() == 0 )
298             {
299                 return true;
300             }
301             else
302             {
303                 return creation;
304             }
305         }
306         catch ( IOException ioe )
307         {
308             LOG.error( "Cannot create the file {}", mavibotFile.getName() );
309             return false;
310         }
311     }
312 
313 
314     /**
315      * We will create a brand new RecordManager file, containing nothing, but the RecordManager header,
316      * a B-tree to manage the old revisions we want to keep and
317      * a B-tree used to manage pages associated with old versions.
318      * <br/>
319      * The RecordManager header contains the following details :
320      * <pre>
321      * +--------------------------+
322      * | PageSize                 | 4 bytes : The size of a physical page (default to 4096)
323      * +--------------------------+
324      * |  NbTree                  | 4 bytes : The number of managed B-trees (at least 1)
325      * +--------------------------+
326      * | FirstFree                | 8 bytes : The offset of the first free page
327      * +--------------------------+
328      * | current BoB offset       | 8 bytes : The offset of the current BoB
329      * +--------------------------+
330      * | previous BoB offset      | 8 bytes : The offset of the previous BoB
331      * +--------------------------+
332      * | current CP btree offset  | 8 bytes : The offset of the current BoB
333      * +--------------------------+
334      * | previous CP btree offset | 8 bytes : The offset of the previous BoB
335      * +--------------------------+
336      * </pre>
337      *
338      * We then store the B-tree managing the pages that have been copied when we have added
339      * or deleted an element in the B-tree. They are associated with a version.
340      *
341      * Last, we add the bTree that keep a track on each revision we can have access to.
342      */
343     private void initRecordManager() throws IOException
344     {
345         // Create a new Header
346         nbBtree = 0;
347         firstFreePage = NO_PAGE;
348         currentBtreeOfBtreesOffset = 0L;
349 
350         updateRecordManagerHeader();
351 
352         // Set the offset of the end of the file
353         endOfFileOffset = fileChannel.size();
354 
355         // First, create the btree of btrees <NameRevision, Long>
356         createBtreeOfBtrees();
357 
358         // Now, initialize the Copied Page B-tree
359         createCopiedPagesBtree();
360 
361         // Inject these B-trees into the RecordManager. They are internal B-trees.
362         try
363         {
364             manage( btreeOfBtrees, INTERNAL_BTREE );
365 
366             currentBtreeOfBtreesOffset = ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader().getBTreeHeaderOffset();
367             updateRecordManagerHeader();
368             
369             // Inject the BtreeOfBtrees into the currentBtreeHeaders map
370             currentBTreeHeaders.put( BTREE_OF_BTREES_NAME,  ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
371             newBTreeHeaders.put( BTREE_OF_BTREES_NAME,  ((PersistedBTree<NameRevision, Long>)btreeOfBtrees).getBtreeHeader() );
372 
373             // The FreePage B-tree
374             manage( copiedPageBtree, INTERNAL_BTREE );
375 
376             currentCopiedPagesBtreeOffset = ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader().getBTreeHeaderOffset();
377             updateRecordManagerHeader();
378             
379             // Inject the CopiedPagesBTree into the currentBtreeHeaders map
380             currentBTreeHeaders.put( COPIED_PAGE_BTREE_NAME, ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader() );
381             newBTreeHeaders.put( COPIED_PAGE_BTREE_NAME, ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader() );
382         }
383         catch ( BTreeAlreadyManagedException btame )
384         {
385             // Can't happen here.
386         }
387 
388         // We are all set ! Verify the file
389         if ( LOG_CHECK.isDebugEnabled() )
390         {
391             MavibotInspector.check( this );
392         }
393 
394     }
395 
396 
397     /**
398      * Create the B-treeOfBtrees
399      */
400     private void createBtreeOfBtrees()
401     {
402         PersistedBTreeConfiguration<NameRevision, Long> configuration = new PersistedBTreeConfiguration<NameRevision, Long>();
403         configuration.setKeySerializer( NameRevisionSerializer.INSTANCE );
404         configuration.setName( BTREE_OF_BTREES_NAME );
405         configuration.setValueSerializer( LongSerializer.INSTANCE );
406         configuration.setBtreeType( BTreeTypeEnum.BTREE_OF_BTREES );
407         configuration.setCacheSize( PersistedBTree.DEFAULT_CACHE_SIZE );
408 
409         btreeOfBtrees = BTreeFactory.createPersistedBTree( configuration );
410     }
411 
412 
413     /**
414      * Create the CopiedPagesBtree
415      */
416     private void createCopiedPagesBtree()
417     {
418         PersistedBTreeConfiguration<RevisionName, long[]> configuration = new PersistedBTreeConfiguration<RevisionName, long[]>();
419         configuration.setKeySerializer( RevisionNameSerializer.INSTANCE );
420         configuration.setName( COPIED_PAGE_BTREE_NAME );
421         configuration.setValueSerializer( LongArraySerializer.INSTANCE );
422         configuration.setBtreeType( BTreeTypeEnum.COPIED_PAGES_BTREE );
423         configuration.setCacheSize( PersistedBTree.DEFAULT_CACHE_SIZE );
424 
425         copiedPageBtree = BTreeFactory.createPersistedBTree( configuration );
426     }
427 
428 
429     /**
430      * Load the BTrees from the disk.
431      *
432      * @throws InstantiationException
433      * @throws IllegalAccessException
434      * @throws ClassNotFoundException
435      * @throws NoSuchFieldException
436      * @throws SecurityException
437      * @throws IllegalArgumentException
438      */
439     private void loadRecordManager() throws IOException, ClassNotFoundException, IllegalAccessException,
440         InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException, KeyNotFoundException
441     {
442         if ( fileChannel.size() != 0 )
443         {
444             ByteBuffer recordManagerHeader = ByteBuffer.allocate( RECORD_MANAGER_HEADER_SIZE );
445 
446             // The file exists, we have to load the data now
447             fileChannel.read( recordManagerHeader );
448 
449             recordManagerHeader.rewind();
450 
451             // read the RecordManager Header :
452             // +---------------------+
453             // | PageSize            | 4 bytes : The size of a physical page (default to 4096)
454             // +---------------------+
455             // | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
456             // +---------------------+
457             // | FirstFree           | 8 bytes : The offset of the first free page
458             // +---------------------+
459             // | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
460             // +---------------------+
461             // | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
462             // +---------------------+
463             // | current CP offset   | 8 bytes : The offset of the current Copied Pages B-tree
464             // +---------------------+
465             // | previous CP offset  | 8 bytes : The offset of the previous Copied Pages B-tree
466             // +---------------------+
467 
468             // The page size
469             pageSize = recordManagerHeader.getInt();
470 
471             // The number of managed B-trees
472             nbBtree = recordManagerHeader.getInt();
473 
474             // The first and last free page
475             firstFreePage = recordManagerHeader.getLong();
476 
477             // The current BOB offset
478             currentBtreeOfBtreesOffset = recordManagerHeader.getLong();
479 
480             // The previous BOB offset
481             previousBtreeOfBtreesOffset = recordManagerHeader.getLong();
482 
483             // The current Copied Pages B-tree offset
484             currentCopiedPagesBtreeOffset = recordManagerHeader.getLong();
485 
486             // The previous Copied Pages B-tree offset
487             previousCopiedPagesBtreeOffset = recordManagerHeader.getLong();
488 
489             // read the B-tree of B-trees
490             PageIO[] bobHeaderPageIos = readPageIOs( currentBtreeOfBtreesOffset, Long.MAX_VALUE );
491 
492             btreeOfBtrees = BTreeFactory.<NameRevision, Long> createPersistedBTree( BTreeTypeEnum.BTREE_OF_BTREES );
493             //BTreeFactory.<NameRevision, Long> setBtreeHeaderOffset( ( PersistedBTree<NameRevision, Long> )btreeOfBtrees, currentBtreeOfBtreesOffset );
494 
495             loadBtree( bobHeaderPageIos, btreeOfBtrees );
496 
497             // read the copied page B-tree
498             PageIO[] copiedPagesPageIos = readPageIOs( currentCopiedPagesBtreeOffset, Long.MAX_VALUE );
499 
500             copiedPageBtree = BTreeFactory.<RevisionName, long[]> createPersistedBTree( BTreeTypeEnum.COPIED_PAGES_BTREE );
501             //( ( PersistedBTree<RevisionName, long[]> ) copiedPageBtree ).setBtreeHeaderOffset( currentCopiedPagesBtreeOffset );
502 
503             loadBtree( copiedPagesPageIos, copiedPageBtree );
504 
505             // Now, read all the B-trees from the btree of btrees
506             TupleCursor<NameRevision, Long> btreeCursor = btreeOfBtrees.browse();
507             Map<String, Long> loadedBtrees = new HashMap<String, Long>();
508 
509             // loop on all the btrees we have, and keep only the latest revision
510             long currentRevision = -1L;
511 
512             while ( btreeCursor.hasNext() )
513             {
514                 Tuple<NameRevision, Long> btreeTuple = btreeCursor.next();
515                 NameRevision nameRevision = btreeTuple.getKey();
516                 long btreeOffset = btreeTuple.getValue();
517                 long revision = nameRevision.getValue();
518 
519                 // Check if we already have processed this B-tree
520                 Long loadedBtreeRevision = loadedBtrees.get( nameRevision.getName() );
521 
522                 if ( loadedBtreeRevision != null )
523                 {
524                     // The btree has already been loaded. The revision is necessarily higher
525                     if ( revision > currentRevision )
526                     {
527                         // We have a newer revision : switch to the new revision (we keep the offset atm)
528                         loadedBtrees.put( nameRevision.getName(), btreeOffset );
529                         currentRevision = revision;
530                     }
531                 }
532                 else
533                 {
534                     // This is a new B-tree
535                     loadedBtrees.put( nameRevision.getName(), btreeOffset );
536                     currentRevision = nameRevision.getRevision();
537                 }
538             }
539 
540             // TODO : clean up the old revisions...
541 
542 
543             // Now, we can load the real btrees using the offsets
544             for ( String btreeName : loadedBtrees.keySet() )
545             {
546                 long btreeOffset = loadedBtrees.get( btreeName );
547 
548                 PageIO[] btreePageIos = readPageIOs( btreeOffset, Long.MAX_VALUE );
549 
550                 BTree<?, ?> btree = BTreeFactory.<NameRevision, Long> createPersistedBTree();
551                 //( ( PersistedBTree<NameRevision, Long> ) btree ).setBtreeHeaderOffset( btreeOffset );
552                 loadBtree( btreePageIos, btree );
553 
554                 // Add the btree into the map of managed B-trees
555                 managedBtrees.put( btreeName, ( BTree<Object, Object> ) btree );
556             }
557 
558             // We are done ! Let's finish with the last initialization parts
559             endOfFileOffset = fileChannel.size();
560         }
561     }
562 
563 
564     /**
565      * Starts a transaction
566      */
567     public void beginTransaction()
568     {
569         // First, take the lock
570         transactionLock.lock();
571         
572         // Now, check the TLS state
573         incrementTxnLevel();
574     }
575 
576 
577     /**
578      * Commits a transaction
579      */
580     public void commit()
581     {
582         if ( !fileChannel.isOpen() )
583         {
584             // The file has been closed, nothing remains to commit, let's get out
585             transactionLock.unlock();
586             
587             // Still we have to decrement the TransactionLevel
588             decrementTxnLevel();
589             
590             return;
591         }
592 
593         int nbTxnStarted = context.get();
594         
595         switch ( nbTxnStarted )
596         {
597             case ROLLBACKED_TXN :
598                 // The transaction was rollbacked, quit immediatelly
599                 transactionLock.unlock();
600                 
601                 return;
602             
603             case 1 :
604                 // We are done with the transaction, we can update the RMHeader and swap the BTreeHeaders
605                 // First update the RMHeader to be sure that we have a way to restore from a crash
606                 updateRecordManagerHeader();
607                 
608                 // Swap the BtreeHeaders maps
609                 swapCurrentBtreeHeaders();
610         
611                 // We can now free pages
612                 for ( PageIO pageIo : freedPages )
613                 {
614                     try
615                     {
616                         free( pageIo );
617                     }
618                     catch ( IOException ioe )
619                     {
620                         throw new RecordManagerException( ioe.getMessage() );
621                     }
622                 }
623         
624                 // Release the allocated and freed pages list
625                 freedPages.clear();
626                 allocatedPages.clear();
627         
628                 // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
629                 // here, we have to erase the old references to keep only the new ones.
630                 updateRecordManagerHeader();
631                 
632                 // And decrement the number of started transactions
633                 decrementTxnLevel();
634 
635                 // Finally, release the global lock
636                 transactionLock.unlock();
637                 
638                 return;
639                 
640             default :
641                 // We are inner an existing transaction. Just update the necessary elements
642                 // Update the RMHeader to be sure that we have a way to restore from a crash
643                 updateRecordManagerHeader();
644                 
645                 // Swap the BtreeHeaders maps
646                 //swapCurrentBtreeHeaders();
647         
648                 // We can now free pages
649                 for ( PageIO pageIo : freedPages )
650                 {
651                     try
652                     {
653                         free( pageIo );
654                     }
655                     catch ( IOException ioe )
656                     {
657                         throw new RecordManagerException( ioe.getMessage() );
658                     }
659                 }
660         
661                 // Release the allocated and freed pages list
662                 freedPages.clear();
663                 allocatedPages.clear();
664         
665                 // And update the RMHeader again, removing the old references to BOB and CPB b-tree headers
666                 // here, we have to erase the old references to keep only the new ones.
667                 updateRecordManagerHeader();
668                 
669                 // And decrement the number of started transactions
670                 decrementTxnLevel();
671 
672                 // Finally, release the global lock
673                 transactionLock.unlock();
674                 return;
675         }
676     }
677     
678     
679     public boolean isContextOk()
680     {
681         return ( context == null ? true : ( context.get() == 0 ) );
682     }
683     
684     /**
685      * Increment the transactionLevel
686      */
687     private void incrementTxnLevel()
688     {
689         Integer nbTxnLevel = context.get();
690         
691         if ( nbTxnLevel == null )
692         {
693             context.set( 1 );
694         }
695         else
696         {
697             // And increment the counter of inner txn.
698             context.set( nbTxnLevel + 1 );
699         }
700         
701         /*
702         System.out.println( "Incrementing : " + context.get() );
703         
704         if ( context.get() == 0 )
705         {
706             System.out.println( "-------------" );
707         }
708         */
709     }
710     
711     
712     /**
713      * Decrement the transactionLevel
714      */
715     private void decrementTxnLevel()
716     {
717         int nbTxnStarted = context.get();
718 
719         context.set(  nbTxnStarted - 1 );
720         
721         //System.out.println( "Incrementing : " + context.get() );
722     }
723 
724 
725     /**
726      * Rollback a transaction
727      */
728     public void rollback()
729     {
730         // Reset the counter
731         context.set( ROLLBACKED_TXN );
732 
733         // We can now free allocated pages, this is the end of the transaction
734         for ( PageIO pageIo : allocatedPages )
735         {
736             try
737             {
738                 free( pageIo );
739             }
740             catch ( IOException ioe )
741             {
742                 throw new RecordManagerException( ioe.getMessage() );
743             }
744         }
745 
746         // Release the allocated and freed pages list
747         freedPages.clear();
748         allocatedPages.clear();
749 
750         // And update the RMHeader
751         updateRecordManagerHeader();
752         
753         // And restore the BTreeHeaders new Map to the current state
754         revertBtreeHeaders();
755 
756         transactionLock.unlock();
757     }
758 
759 
760     /**
761      * Reads all the PageIOs that are linked to the page at the given position, including
762      * the first page.
763      *
764      * @param position The position of the first page
765      * @param limit The maximum bytes to read. Set this value to -1 when the size is unknown.
766      * @return An array of pages
767      */
768     /*no qualifier*/ PageIO[] readPageIOs( long position, long limit ) throws IOException, EndOfFileExceededException
769     {
770         LOG.debug( "Read PageIOs at position {}", position );
771 
772         if ( limit <= 0 )
773         {
774             limit = Long.MAX_VALUE;
775         }
776 
777         PageIO firstPage = fetchPage( position );
778         firstPage.setSize();
779         List<PageIO> listPages = new ArrayList<PageIO>();
780         listPages.add( firstPage );
781         long dataRead = pageSize - LONG_SIZE - INT_SIZE;
782 
783         // Iterate on the pages, if needed
784         long nextPage = firstPage.getNextPage();
785 
786         if ( ( dataRead < limit ) && ( nextPage != NO_PAGE ) )
787         {
788             while ( dataRead < limit )
789             {
790                 PageIO page = fetchPage( nextPage );
791                 listPages.add( page );
792                 nextPage = page.getNextPage();
793                 dataRead += pageSize - LONG_SIZE;
794 
795                 if ( nextPage == NO_PAGE )
796                 {
797                     page.setNextPage( NO_PAGE );
798                     break;
799                 }
800             }
801         }
802 
803         LOG.debug( "Nb of PageIOs read : {}", listPages.size() );
804 
805         // Return
806         return listPages.toArray( new PageIO[]
807             {} );
808     }
809 
810 
811     /**
812      * Check the offset to be sure it's a valid one :
813      * <ul>
814      * <li>It's >= 0</li>
815      * <li>It's below the end of the file</li>
816      * <li>It's a multipl of the pageSize
817      * </ul>
818      * @param offset The offset to check
819      * @throws InvalidOffsetException If the offset is not valid
820      */
821     private void checkOffset( long offset )
822     {
823         if ( ( offset < 0 ) || ( offset > endOfFileOffset ) || ( ( offset % pageSize ) != 0 ) )
824         {
825             throw new InvalidOffsetException( "Bad Offset : " + offset );
826         }
827     }
828 
829 
830     /**
831      * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
832      * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
833      *
834      * @param pageIos The list of pages containing the meta-data
835      * @param btree The B-tree we have to initialize
836      * @throws InstantiationException
837      * @throws IllegalAccessException
838      * @throws ClassNotFoundException
839      * @throws NoSuchFieldException
840      * @throws SecurityException
841      * @throws IllegalArgumentException
842      */
843     private <K, V> void loadBtree( PageIO[] pageIos, BTree<K, V> btree ) throws EndOfFileExceededException,
844         IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
845     {
846         loadBtree( pageIos, btree, null );
847     }
848 
849 
850     /**
851      * Read a B-tree from the disk. The meta-data are at the given position in the list of pages.
852      * We load a B-tree in two steps : first, we load the B-tree header, then the common informations
853      *
854      * @param pageIos The list of pages containing the meta-data
855      * @param btree The B-tree we have to initialize
856      * @throws InstantiationException
857      * @throws IllegalAccessException
858      * @throws ClassNotFoundException
859      * @throws NoSuchFieldException
860      * @throws SecurityException
861      * @throws IllegalArgumentException
862      */
863     /* no qualifier */ <K, V> void loadBtree( PageIO[] pageIos, BTree btree, BTree<K, V> parentBTree ) throws EndOfFileExceededException,
864         IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, IllegalArgumentException, SecurityException, NoSuchFieldException
865     {
866         long dataPos = 0L;
867 
868         // Process the B-tree header
869         BTreeHeader<K, V> btreeHeader = new BTreeHeader<K, V>();
870         btreeHeader.setBtree( btree );
871 
872         // The BtreeHeader offset
873         btreeHeader.setBTreeHeaderOffset( pageIos[0].getOffset() );
874 
875         // The B-tree current revision
876         long revision = readLong( pageIos, dataPos );
877         btreeHeader.setRevision( revision );
878         dataPos += LONG_SIZE;
879 
880         // The nb elems in the tree
881         long nbElems = readLong( pageIos, dataPos );
882         btreeHeader.setNbElems( nbElems );
883         dataPos += LONG_SIZE;
884 
885         // The B-tree rootPage offset
886         long rootPageOffset = readLong( pageIos, dataPos );
887         btreeHeader.setRootPageOffset( rootPageOffset );
888         dataPos += LONG_SIZE;
889 
890         // The B-tree information offset
891         long btreeInfoOffset = readLong( pageIos, dataPos );
892 
893         // Now, process the common informations
894         PageIO[] infoPageIos = readPageIOs( btreeInfoOffset, Long.MAX_VALUE );
895         ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( infoPageIos[0].getOffset() );
896         dataPos = 0L;
897 
898         // The B-tree page size
899         int btreePageSize = readInt( infoPageIos, dataPos );
900         BTreeFactory.setPageSize( btree, btreePageSize );
901         dataPos += INT_SIZE;
902 
903         // The tree name
904         ByteBuffer btreeNameBytes = readBytes( infoPageIos, dataPos );
905         dataPos += INT_SIZE + btreeNameBytes.limit();
906         String btreeName = Strings.utf8ToString( btreeNameBytes );
907         BTreeFactory.setName( btree, btreeName );
908 
909         // The keySerializer FQCN
910         ByteBuffer keySerializerBytes = readBytes( infoPageIos, dataPos );
911         dataPos += INT_SIZE + keySerializerBytes.limit();
912 
913         String keySerializerFqcn = "";
914 
915         if ( keySerializerBytes != null )
916         {
917             keySerializerFqcn = Strings.utf8ToString( keySerializerBytes );
918         }
919 
920         BTreeFactory.setKeySerializer( btree, keySerializerFqcn );
921 
922         // The valueSerialier FQCN
923         ByteBuffer valueSerializerBytes = readBytes( infoPageIos, dataPos );
924 
925         String valueSerializerFqcn = "";
926         dataPos += INT_SIZE + valueSerializerBytes.limit();
927 
928         if ( valueSerializerBytes != null )
929         {
930             valueSerializerFqcn = Strings.utf8ToString( valueSerializerBytes );
931         }
932 
933         BTreeFactory.setValueSerializer( btree, valueSerializerFqcn );
934 
935         // The B-tree allowDuplicates flag
936         int allowDuplicates = readInt( infoPageIos, dataPos );
937         ( ( PersistedBTree<K, V> ) btree ).setAllowDuplicates( allowDuplicates != 0 );
938         dataPos += INT_SIZE;
939 
940         // Set the recordManager in the btree
941         ( ( PersistedBTree<K, V> ) btree ).setRecordManager( this );
942 
943         // Set the current revision to the one stored in the B-tree header
944         // Here, we have to tell the BTree to keep this revision in the
945         // btreeRevisions Map, thus the 'true' parameter at the end.
946         ((PersistedBTree<K, V>)btree).storeRevision( btreeHeader, true );
947 
948         // Now, init the B-tree
949         ( ( PersistedBTree<K, V> ) btree ).init( parentBTree );
950         
951         // Update the BtreeHeaders Maps
952         currentBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
953         newBTreeHeaders.put( btree.getName(), ( ( PersistedBTree<K, V> ) btree ).getBtreeHeader() );
954 
955         // Read the rootPage pages on disk
956         PageIO[] rootPageIos = readPageIOs( rootPageOffset, Long.MAX_VALUE );
957 
958         Page<K, V> btreeRoot = readPage( btree, rootPageIos );
959         BTreeFactory.setRecordManager( btree, this );
960 
961         BTreeFactory.setRootPage( btree, btreeRoot );
962     }
963 
964 
965     /**
966      * Deserialize a Page from a B-tree at a give position
967      *
968      * @param btree The B-tree we want to read a Page from
969      * @param offset The position in the file for this page
970      * @return The read page
971      * @throws EndOfFileExceededException If we have reached the end of the file while reading the page
972      */
973     public <K, V> Page<K, V> deserialize( BTree<K, V> btree, long offset ) throws EndOfFileExceededException,
974         IOException
975     {
976         checkOffset( offset );
977         PageIO[] rootPageIos = readPageIOs( offset, Long.MAX_VALUE );
978 
979         Page<K, V> page = readPage( btree, rootPageIos );
980 
981         return page;
982     }
983 
984 
985     /**
986      * Read a page from some PageIO for a given B-tree
987      * @param btree The B-tree we want to read a page for
988      * @param pageIos The PageIO containing the raw data
989      * @return The read Page if successful
990      * @throws IOException If the deserialization failed
991      */
992     private <K, V> Page<K, V> readPage( BTree<K, V> btree, PageIO[] pageIos ) throws IOException
993     {
994         // Deserialize the rootPage now
995         long position = 0L;
996 
997         // The revision
998         long revision = readLong( pageIos, position );
999         position += LONG_SIZE;
1000 
1001         // The number of elements in the page
1002         int nbElems = readInt( pageIos, position );
1003         position += INT_SIZE;
1004 
1005         // The size of the data containing the keys and values
1006         Page<K, V> page = null;
1007 
1008         // Reads the bytes containing all the keys and values, if we have some
1009         // We read  big blog of data into  ByteBuffer, then we will process
1010         // this ByteBuffer
1011         ByteBuffer byteBuffer = readBytes( pageIos, position );
1012 
1013         // Now, deserialize the data block. If the number of elements
1014         // is positive, it's a Leaf, otherwise it's a Node
1015         // Note that only a leaf can have 0 elements, and it's the root page then.
1016         if ( nbElems >= 0 )
1017         {
1018             // It's a leaf
1019             page = readLeafKeysAndValues( btree, nbElems, revision, byteBuffer, pageIos );
1020         }
1021         else
1022         {
1023             // It's a node
1024             page = readNodeKeysAndValues( btree, -nbElems, revision, byteBuffer, pageIos );
1025         }
1026 
1027         ( ( AbstractPage<K, V> ) page ).setOffset( pageIos[0].getOffset() );
1028         if ( pageIos.length > 1 )
1029         {
1030             ( ( AbstractPage<K, V> ) page ).setLastOffset( pageIos[pageIos.length - 1].getOffset() );
1031         }
1032 
1033         return page;
1034     }
1035 
1036 
1037     /**
1038      * Deserialize a Leaf from some PageIOs
1039      */
1040     private <K, V> PersistedLeaf<K, V> readLeafKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
1041         ByteBuffer byteBuffer, PageIO[] pageIos )
1042     {
1043         // Its a leaf, create it
1044         PersistedLeaf<K, V> leaf = ( PersistedLeaf<K, V> ) BTreeFactory.createLeaf( btree, revision, nbElems );
1045 
1046         // Store the page offset on disk
1047         leaf.setOffset( pageIos[0].getOffset() );
1048         leaf.setLastOffset( pageIos[pageIos.length - 1].getOffset() );
1049 
1050         int[] keyLengths = new int[nbElems];
1051         int[] valueLengths = new int[nbElems];
1052 
1053         boolean isNotSubTree = ( btree.getType() != BTreeTypeEnum.PERSISTED_SUB );
1054         
1055         // Read each key and value
1056         for ( int i = 0; i < nbElems; i++ )
1057         {
1058             if ( isNotSubTree )
1059             {
1060                 // Read the number of values
1061                 int nbValues = byteBuffer.getInt();
1062                 PersistedValueHolder<V> valueHolder = null;
1063                 
1064                 if ( nbValues < 0 )
1065                 {
1066                     // This is a sub-btree
1067                     byte[] btreeOffsetBytes = new byte[LONG_SIZE];
1068                     byteBuffer.get( btreeOffsetBytes );
1069                     
1070                     // Create the valueHolder. As the number of values is negative, we have to switch
1071                     // to a positive value but as we start at -1 for 0 value, add 1.
1072                     valueHolder = new PersistedValueHolder<V>( btree, 1 - nbValues, btreeOffsetBytes );
1073                 }
1074                 else
1075                 {
1076                     // This is an array
1077                     // Read the value's array length
1078                     valueLengths[i] = byteBuffer.getInt();
1079                     
1080                     // This is an Array of values, read the byte[] associated with it
1081                     byte[] arrayBytes = new byte[valueLengths[i]];
1082                     byteBuffer.get( arrayBytes );
1083                     valueHolder = new PersistedValueHolder<V>( btree, nbValues, arrayBytes );
1084                 }
1085                 
1086                 BTreeFactory.setValue( btree, leaf, i, valueHolder );
1087             }
1088 
1089             keyLengths[i] = byteBuffer.getInt();
1090             byte[] data = new byte[keyLengths[i]];
1091             byteBuffer.get( data );
1092             BTreeFactory.setKey( btree, leaf, i, data );
1093         }
1094 
1095         return leaf;
1096     }
1097 
1098 
1099     /**
1100      * Deserialize a Node from some PageIos
1101      */
1102     private <K, V> PersistedNode<K, V> readNodeKeysAndValues( BTree<K, V> btree, int nbElems, long revision,
1103         ByteBuffer byteBuffer, PageIO[] pageIos ) throws IOException
1104     {
1105         PersistedNode<K, V> node = ( PersistedNode<K, V> ) BTreeFactory.createNode( btree, revision, nbElems );
1106 
1107         // Read each value and key
1108         for ( int i = 0; i < nbElems; i++ )
1109         {
1110             // This is an Offset
1111             long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1112             long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1113 
1114             PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
1115             node.setValue( i, valueHolder );
1116 
1117             // Read the key length
1118             int keyLength = byteBuffer.getInt();
1119 
1120             int currentPosition = byteBuffer.position();
1121 
1122             // and the key value
1123             K key = btree.getKeySerializer().deserialize( byteBuffer );
1124 
1125             // Set the new position now
1126             byteBuffer.position( currentPosition + keyLength );
1127 
1128             BTreeFactory.setKey( btree, node, i, key );
1129         }
1130 
1131         // and read the last value, as it's a node
1132         long offset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1133         long lastOffset = LongSerializer.INSTANCE.deserialize( byteBuffer );
1134 
1135         PersistedPageHolder<K, V> valueHolder = new PersistedPageHolder<K, V>( btree, null, offset, lastOffset );
1136         node.setValue( nbElems, valueHolder );
1137 
1138         return node;
1139     }
1140 
1141 
1142     /**
1143      * Read a byte[] from pages.
1144      *
1145      * @param pageIos The pages we want to read the byte[] from
1146      * @param position The position in the data stored in those pages
1147      * @return The byte[] we have read
1148      */
1149     /* no qualifier */ ByteBuffer readBytes( PageIO[] pageIos, long position )
1150     {
1151         // Read the byte[] length first
1152         int length = readInt( pageIos, position );
1153         position += INT_SIZE;
1154 
1155         // Compute the page in which we will store the data given the
1156         // current position
1157         int pageNb = computePageNb( position );
1158 
1159         // Compute the position in the current page
1160         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1161 
1162         ByteBuffer pageData = pageIos[pageNb].getData();
1163         int remaining = pageData.capacity() - pagePos;
1164 
1165         if ( length == 0 )
1166         {
1167             // No bytes to read : return null;
1168             return null;
1169         }
1170         else
1171         {
1172             ByteBuffer bytes = ByteBuffer.allocate( length );
1173 
1174             while ( length > 0 )
1175             {
1176                 if ( length <= remaining )
1177                 {
1178                     pageData.mark();
1179                     pageData.position( pagePos );
1180                     int oldLimit = pageData.limit();
1181                     pageData.limit( pagePos + length );
1182                     bytes.put( pageData );
1183                     pageData.limit( oldLimit );
1184                     pageData.reset();
1185                     bytes.rewind();
1186 
1187                     return bytes;
1188                 }
1189 
1190                 pageData.mark();
1191                 pageData.position( pagePos );
1192                 int oldLimit = pageData.limit();
1193                 pageData.limit( pagePos + remaining );
1194                 bytes.put( pageData );
1195                 pageData.limit( oldLimit );
1196                 pageData.reset();
1197                 pageNb++;
1198                 pagePos = LINK_SIZE;
1199                 pageData = pageIos[pageNb].getData();
1200                 length -= remaining;
1201                 remaining = pageData.capacity() - pagePos;
1202             }
1203 
1204             bytes.rewind();
1205 
1206             return bytes;
1207         }
1208     }
1209 
1210 
1211     /**
1212      * Read an int from pages
1213      * @param pageIos The pages we want to read the int from
1214      * @param position The position in the data stored in those pages
1215      * @return The int we have read
1216      */
1217     /* no qualifier */ int readInt( PageIO[] pageIos, long position )
1218     {
1219         // Compute the page in which we will store the data given the
1220         // current position
1221         int pageNb = computePageNb( position );
1222 
1223         // Compute the position in the current page
1224         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1225 
1226         ByteBuffer pageData = pageIos[pageNb].getData();
1227         int remaining = pageData.capacity() - pagePos;
1228         int value = 0;
1229 
1230         if ( remaining >= INT_SIZE )
1231         {
1232             value = pageData.getInt( pagePos );
1233         }
1234         else
1235         {
1236             value = 0;
1237 
1238             switch ( remaining )
1239             {
1240                 case 3:
1241                     value += ( ( pageData.get( pagePos + 2 ) & 0x00FF ) << 8 );
1242                     // Fallthrough !!!
1243 
1244                 case 2:
1245                     value += ( ( pageData.get( pagePos + 1 ) & 0x00FF ) << 16 );
1246                     // Fallthrough !!!
1247 
1248                 case 1:
1249                     value += ( pageData.get( pagePos ) << 24 );
1250                     break;
1251             }
1252 
1253             // Now deal with the next page
1254             pageData = pageIos[pageNb + 1].getData();
1255             pagePos = LINK_SIZE;
1256 
1257             switch ( remaining )
1258             {
1259                 case 1:
1260                     value += ( pageData.get( pagePos ) & 0x00FF ) << 16;
1261                     // fallthrough !!!
1262 
1263                 case 2:
1264                     value += ( pageData.get( pagePos + 2 - remaining ) & 0x00FF ) << 8;
1265                     // fallthrough !!!
1266 
1267                 case 3:
1268                     value += ( pageData.get( pagePos + 3 - remaining ) & 0x00FF );
1269                     break;
1270             }
1271         }
1272 
1273         return value;
1274     }
1275 
1276 
1277     /**
1278      * Read a byte from pages
1279      * @param pageIos The pages we want to read the byte from
1280      * @param position The position in the data stored in those pages
1281      * @return The byte we have read
1282      */
1283     private byte readByte( PageIO[] pageIos, long position )
1284     {
1285         // Compute the page in which we will store the data given the
1286         // current position
1287         int pageNb = computePageNb( position );
1288 
1289         // Compute the position in the current page
1290         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1291 
1292         ByteBuffer pageData = pageIos[pageNb].getData();
1293         byte value = 0;
1294 
1295         value = pageData.get( pagePos );
1296 
1297         return value;
1298     }
1299 
1300 
1301     /**
1302      * Read a long from pages
1303      * @param pageIos The pages we want to read the long from
1304      * @param position The position in the data stored in those pages
1305      * @return The long we have read
1306      */
1307     /* no qualifier */ long readLong( PageIO[] pageIos, long position )
1308     {
1309         // Compute the page in which we will store the data given the
1310         // current position
1311         int pageNb = computePageNb( position );
1312 
1313         // Compute the position in the current page
1314         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
1315 
1316         ByteBuffer pageData = pageIos[pageNb].getData();
1317         int remaining = pageData.capacity() - pagePos;
1318         long value = 0L;
1319 
1320         if ( remaining >= LONG_SIZE )
1321         {
1322             value = pageData.getLong( pagePos );
1323         }
1324         else
1325         {
1326             switch ( remaining )
1327             {
1328                 case 7:
1329                     value += ( ( ( long ) pageData.get( pagePos + 6 ) & 0x00FF ) << 8 );
1330                     // Fallthrough !!!
1331 
1332                 case 6:
1333                     value += ( ( ( long ) pageData.get( pagePos + 5 ) & 0x00FF ) << 16 );
1334                     // Fallthrough !!!
1335 
1336                 case 5:
1337                     value += ( ( ( long ) pageData.get( pagePos + 4 ) & 0x00FF ) << 24 );
1338                     // Fallthrough !!!
1339 
1340                 case 4:
1341                     value += ( ( ( long ) pageData.get( pagePos + 3 ) & 0x00FF ) << 32 );
1342                     // Fallthrough !!!
1343 
1344                 case 3:
1345                     value += ( ( ( long ) pageData.get( pagePos + 2 ) & 0x00FF ) << 40 );
1346                     // Fallthrough !!!
1347 
1348                 case 2:
1349                     value += ( ( ( long ) pageData.get( pagePos + 1 ) & 0x00FF ) << 48 );
1350                     // Fallthrough !!!
1351 
1352                 case 1:
1353                     value += ( ( long ) pageData.get( pagePos ) << 56 );
1354                     break;
1355             }
1356 
1357             // Now deal with the next page
1358             pageData = pageIos[pageNb + 1].getData();
1359             pagePos = LINK_SIZE;
1360 
1361             switch ( remaining )
1362             {
1363                 case 1:
1364                     value += ( ( long ) pageData.get( pagePos ) & 0x00FF ) << 48;
1365                     // fallthrough !!!
1366 
1367                 case 2:
1368                     value += ( ( long ) pageData.get( pagePos + 2 - remaining ) & 0x00FF ) << 40;
1369                     // fallthrough !!!
1370 
1371                 case 3:
1372                     value += ( ( long ) pageData.get( pagePos + 3 - remaining ) & 0x00FF ) << 32;
1373                     // fallthrough !!!
1374 
1375                 case 4:
1376                     value += ( ( long ) pageData.get( pagePos + 4 - remaining ) & 0x00FF ) << 24;
1377                     // fallthrough !!!
1378 
1379                 case 5:
1380                     value += ( ( long ) pageData.get( pagePos + 5 - remaining ) & 0x00FF ) << 16;
1381                     // fallthrough !!!
1382 
1383                 case 6:
1384                     value += ( ( long ) pageData.get( pagePos + 6 - remaining ) & 0x00FF ) << 8;
1385                     // fallthrough !!!
1386 
1387                 case 7:
1388                     value += ( ( long ) pageData.get( pagePos + 7 - remaining ) & 0x00FF );
1389                     break;
1390             }
1391         }
1392 
1393         return value;
1394     }
1395 
1396 
1397     /**
1398      * Manage a B-tree. The btree will be added and managed by this RecordManager. We will create a
1399      * new RootPage for this added B-tree, which will contain no data.<br/>
1400      * This method is threadsafe.
1401      *
1402      * @param btree The new B-tree to manage.
1403      * @throws BTreeAlreadyManagedException if the B-tree is already managed
1404      * @throws IOException if there was a problem while accessing the file
1405      */
1406     public synchronized <K, V> void manage( BTree<K, V> btree ) throws BTreeAlreadyManagedException, IOException
1407     {
1408         beginTransaction();
1409 
1410         manage( ( BTree<Object, Object> ) btree, NORMAL_BTREE );
1411 
1412         commit();
1413     }
1414 
1415 
1416     /**
1417      * Managing a btree is a matter of storing an reference to the managed B-tree in the B-tree Of B-trees.
1418      * We store a tuple of NameRevision (where revision is 0L) and a offset to the B-tree header.
1419      * At the same time, we keep a track of the managed B-trees in a Map.
1420      *
1421      * @param btree The new B-tree to manage.
1422      * @param treeType flag indicating if this is an internal tree
1423      *
1424      * @throws BTreeAlreadyManagedException If the B-tree is already managed
1425      * @throws IOException
1426      */
1427     public synchronized <K, V> void manage( BTree<K, V> btree, boolean treeType )
1428         throws BTreeAlreadyManagedException, IOException
1429     {
1430         LOG.debug( "Managing the btree {} which is an internam tree : {}", btree.getName(), treeType );
1431         BTreeFactory.setRecordManager( btree, this );
1432 
1433         String name = btree.getName();
1434 
1435         if ( managedBtrees.containsKey( name ) )
1436         {
1437             // There is already a B-tree with this name in the recordManager...
1438             LOG.error( "There is already a B-tree named '{}' managed by this recordManager", name );
1439             throw new BTreeAlreadyManagedException( name );
1440         }
1441 
1442         // Now, write the B-tree informations
1443         long btreeInfoOffset = writeBtreeInfo( btree );
1444         BTreeHeader<K, V> btreeHeader = ((AbstractBTree<K,V>)btree).getBtreeHeader();
1445         ((PersistedBTree<K, V>)btree).setBtreeInfoOffset( btreeInfoOffset );
1446 
1447         // Serialize the B-tree root page
1448         Page<K, V> rootPage = btreeHeader.getRootPage();
1449 
1450         PageIO[] rootPageIos = serializePage( btree, btreeHeader.getRevision(), rootPage );
1451 
1452         // Get the reference on the first page
1453         long rootPageOffset =  rootPageIos[0].getOffset();
1454 
1455         // Store the rootPageOffset into the Btree header and into the rootPage
1456         btreeHeader.setRootPageOffset( rootPageOffset );
1457         ( ( PersistedLeaf<K, V> ) rootPage ).setOffset( rootPageOffset );
1458 
1459         LOG.debug( "Flushing the newly managed '{}' btree rootpage", btree.getName() );
1460         flushPages( rootPageIos );
1461 
1462         // And the B-tree header
1463         long btreeHeaderOffset = writeBtreeHeader( btree, btreeHeader );
1464 
1465         // Now, if this is a new B-tree, add it to the B-tree of B-trees
1466         if ( treeType != INTERNAL_BTREE )
1467         {
1468             // Add the btree into the map of managed B-trees
1469             managedBtrees.put( name, ( BTree<Object, Object> ) btree );
1470             
1471             // And in the Map of currentBtreeHeaders and newBtreeHeaders
1472             currentBTreeHeaders.put( name, btreeHeader );
1473             newBTreeHeaders.put( name, btreeHeader );
1474 
1475             // We can safely increment the number of managed B-trees
1476             nbBtree++;
1477 
1478             // Create the new NameRevision
1479             NameRevision nameRevision = new NameRevision( name, 0L );
1480 
1481             // Inject it into the B-tree of B-tree
1482             btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
1483         }
1484     }
1485 
1486 
1487     /**
1488      * Serialize a new Page. It will contain the following data :<br/>
1489      * <ul>
1490      * <li>the revision : a long</li>
1491      * <li>the number of elements : an int (if <= 0, it's a Node, otherwise it's a Leaf)</li>
1492      * <li>the size of the values/keys when serialized
1493      * <li>the keys : an array of serialized keys</li>
1494      * <li>the values : an array of references to the children pageIO offset (stored as long)
1495      * if it's a Node, or a list of values if it's a Leaf</li>
1496      * <li></li>
1497      * </ul>
1498      *
1499      * @param revision The node revision
1500      * @param keys The keys to serialize
1501      * @param children The references to the children
1502      * @return An array of pages containing the serialized node
1503      * @throws IOException
1504      */
1505     private <K, V> PageIO[] serializePage( BTree<K, V> btree, long revision, Page<K, V> page ) throws IOException
1506     {
1507         int nbElems = page.getNbElems();
1508 
1509         boolean isNotSubTree = ( btree.getType() != BTreeTypeEnum.PERSISTED_SUB );
1510         
1511         if ( nbElems == 0 )
1512         {
1513             return serializeRootPage( revision );
1514         }
1515         else
1516         {
1517             // Prepare a list of byte[] that will contain the serialized page
1518             int nbBuffers = 1 + 1 + 1 + nbElems * 3;
1519             int dataSize = 0;
1520             int serializedSize = 0;
1521 
1522             if ( page.isNode() )
1523             {
1524                 // A Node has one more value to store
1525                 nbBuffers++;
1526             }
1527 
1528             // Now, we can create the list with the right size
1529             List<byte[]> serializedData = new ArrayList<byte[]>( nbBuffers );
1530 
1531             // The revision
1532             byte[] buffer = LongSerializer.serialize( revision );
1533             serializedData.add( buffer );
1534             serializedSize += buffer.length;
1535 
1536             // The number of elements
1537             // Make it a negative value if it's a Node
1538             int pageNbElems = nbElems;
1539 
1540             if ( page.isNode() )
1541             {
1542                 pageNbElems = -nbElems;
1543             }
1544 
1545             buffer = IntSerializer.serialize( pageNbElems );
1546             serializedData.add( buffer );
1547             serializedSize += buffer.length;
1548 
1549             // Iterate on the keys and values. We first serialize the value, then the key
1550             // until we are done with all of them. If we are serializing a page, we have
1551             // to serialize one more value
1552             for ( int pos = 0; pos < nbElems; pos++ )
1553             {
1554                 // Start with the value
1555                 if ( page.isNode() )
1556                 {
1557                     dataSize += serializeNodeValue( ( PersistedNode<K, V> ) page, pos, serializedData );
1558                     dataSize += serializeNodeKey( ( PersistedNode<K, V> ) page, pos, serializedData );
1559                 }
1560                 else
1561                 {
1562                     if ( isNotSubTree )
1563                     {
1564                         dataSize += serializeLeafValue( ( PersistedLeaf<K, V> ) page, pos, serializedData );
1565                     }
1566                     
1567                     dataSize += serializeLeafKey( ( PersistedLeaf<K, V> ) page, pos, serializedData );
1568                 }
1569             }
1570 
1571             // Nodes have one more value to serialize
1572             if ( page.isNode() )
1573             {
1574                 dataSize += serializeNodeValue( ( PersistedNode<K, V> ) page, nbElems, serializedData );
1575             }
1576 
1577             // Store the data size
1578             buffer = IntSerializer.serialize( dataSize );
1579             serializedData.add( 2, buffer );
1580             serializedSize += buffer.length;
1581 
1582             serializedSize += dataSize;
1583 
1584             // We are done. Allocate the pages we need to store the data
1585             PageIO[] pageIos = getFreePageIOs( serializedSize );
1586 
1587             // And store the data into those pages
1588             long position = 0L;
1589 
1590             for ( byte[] bytes : serializedData )
1591             {
1592                 position = storeRaw( position, bytes, pageIos );
1593             }
1594 
1595             return pageIos;
1596         }
1597     }
1598 
1599 
1600     /**
1601      * Serialize a Node's key
1602      */
1603     private <K, V> int serializeNodeKey( PersistedNode<K, V> node, int pos, List<byte[]> serializedData )
1604     {
1605         KeyHolder<K> holder = node.getKeyHolder( pos );
1606         byte[] buffer = ( ( PersistedKeyHolder<K> ) holder ).getRaw();
1607 
1608         // We have to store the serialized key length
1609         byte[] length = IntSerializer.serialize( buffer.length );
1610         serializedData.add( length );
1611 
1612         // And store the serialized key now if not null
1613         if ( buffer.length != 0 )
1614         {
1615             serializedData.add( buffer );
1616         }
1617 
1618         return buffer.length + INT_SIZE;
1619     }
1620 
1621 
1622     /**
1623      * Serialize a Node's Value. We store the two offsets of the child page.
1624      */
1625     private <K, V> int serializeNodeValue( PersistedNode<K, V> node, int pos, List<byte[]> serializedData )
1626         throws IOException
1627     {
1628         // For a node, we just store the children's offsets
1629         Page<K, V> child = node.getReference( pos );
1630 
1631         // The first offset
1632         byte[] buffer = LongSerializer.serialize( ( ( AbstractPage<K, V> ) child ).getOffset() );
1633         serializedData.add( buffer );
1634         int dataSize = buffer.length;
1635 
1636         // The last offset
1637         buffer = LongSerializer.serialize( ( ( AbstractPage<K, V> ) child ).getLastOffset() );
1638         serializedData.add( buffer );
1639         dataSize += buffer.length;
1640 
1641         return dataSize;
1642     }
1643 
1644 
1645     /**
1646      * Serialize a Leaf's key
1647      */
1648     private <K, V> int serializeLeafKey( PersistedLeaf<K, V> leaf, int pos, List<byte[]> serializedData )
1649     {
1650         int dataSize = 0;
1651         KeyHolder<K> keyHolder = leaf.getKeyHolder( pos );
1652         byte[] keyData = ( ( PersistedKeyHolder<K> ) keyHolder ).getRaw();
1653 
1654         if ( keyData != null )
1655         {
1656             // We have to store the serialized key length
1657             byte[] length = IntSerializer.serialize( keyData.length );
1658             serializedData.add( length );
1659 
1660             // And the key data
1661             serializedData.add( keyData );
1662             dataSize += keyData.length + INT_SIZE;
1663         }
1664         else
1665         {
1666             serializedData.add( IntSerializer.serialize( 0 ) );
1667             dataSize += INT_SIZE;
1668         }
1669 
1670         return dataSize;
1671     }
1672 
1673 
1674     /**
1675      * Serialize a Leaf's Value.
1676      */
1677     private <K, V> int serializeLeafValue( PersistedLeaf<K, V> leaf, int pos, List<byte[]> serializedData )
1678         throws IOException
1679     {
1680         // The value can be an Array or a sub-btree, but we don't care
1681         // we just iterate on all the values
1682         ValueHolder<V> valueHolder = leaf.getValue( pos );
1683         int dataSize = 0;
1684         int nbValues = valueHolder.size();
1685 
1686         if ( !valueHolder.isSubBtree() )
1687         {
1688             // Write the nb elements first
1689             byte[] buffer = IntSerializer.serialize( nbValues );
1690             serializedData.add( buffer );
1691             dataSize = INT_SIZE;
1692 
1693             // We have a serialized value. Just flush it
1694             byte[] data = ( ( PersistedValueHolder<V> ) valueHolder ).getRaw();
1695             dataSize += data.length;
1696 
1697             // Store the data size
1698             buffer = IntSerializer.serialize( data.length );
1699             serializedData.add( buffer );
1700             dataSize += INT_SIZE;
1701 
1702             // and add the data if it's not 0
1703             if ( data.length > 0 )
1704             {
1705                 serializedData.add( data );
1706             }
1707         }
1708         else
1709         {
1710             if ( nbValues == 0 )
1711             {
1712                 // No value.
1713                 byte[] buffer = IntSerializer.serialize( nbValues );
1714                 serializedData.add( buffer );
1715 
1716                 return buffer.length;
1717             }
1718 
1719             if ( valueHolder.isSubBtree() )
1720             {
1721                 // Store the nbVlues as a negative number. We add 1 so that 0 is not confused with an Array value
1722                 byte[] buffer = IntSerializer.serialize( -( nbValues + 1 ) );
1723                 serializedData.add( buffer );
1724                 dataSize += buffer.length;
1725 
1726                 // the B-tree offset
1727                 buffer = LongSerializer.serialize( ( ( PersistedValueHolder<V> ) valueHolder ).getOffset() );
1728                 serializedData.add( buffer );
1729                 dataSize += buffer.length;
1730             }
1731             else
1732             {
1733                 // This is an array, store the nb of values as a positive number
1734                 byte[] buffer = IntSerializer.serialize( nbValues );
1735                 serializedData.add( buffer );
1736                 dataSize += buffer.length;
1737 
1738                 // Now store each value
1739                 byte[] data = ( ( PersistedValueHolder<V> ) valueHolder ).getRaw();
1740                 buffer = IntSerializer.serialize( data.length );
1741                 serializedData.add( buffer );
1742                 dataSize += buffer.length;
1743 
1744                 if ( data.length > 0 )
1745                 {
1746                     serializedData.add( data );
1747                 }
1748 
1749                 dataSize += data.length;
1750             }
1751         }
1752 
1753         return dataSize;
1754     }
1755 
1756 
1757     /**
1758      * Write a root page with no elements in it
1759      */
1760     private PageIO[] serializeRootPage( long revision ) throws IOException
1761     {
1762         // We will have 1 single page if we have no elements
1763         PageIO[] pageIos = new PageIO[1];
1764 
1765         // This is either a new root page or a new page that will be filled later
1766         PageIO newPage = fetchNewPage();
1767 
1768         // We need first to create a byte[] that will contain all the data
1769         // For the root page, this is easy, as we only have to store the revision,
1770         // and the number of elements, which is 0.
1771         long position = 0L;
1772 
1773         position = store( position, revision, newPage );
1774         position = store( position, 0, newPage );
1775 
1776         // Update the page size now
1777         newPage.setSize( ( int ) position );
1778 
1779         // Insert the result into the array of PageIO
1780         pageIos[0] = newPage;
1781 
1782         return pageIos;
1783     }
1784 
1785 
1786     /**
1787      * Update the RecordManager header, injecting the following data :
1788      *
1789      * <pre>
1790      * +---------------------+
1791      * | PageSize            | 4 bytes : The size of a physical page (default to 4096)
1792      * +---------------------+
1793      * | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
1794      * +---------------------+
1795      * | FirstFree           | 8 bytes : The offset of the first free page
1796      * +---------------------+
1797      * | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
1798      * +---------------------+
1799      * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
1800      * +---------------------+
1801      * | current CP offset   | 8 bytes : The offset of the current CopiedPages B-tree
1802      * +---------------------+
1803      * | previous CP offset  | 8 bytes : The offset of the previous CopiedPages B-tree
1804      * +---------------------+
1805      * </pre>
1806      */
1807     public void updateRecordManagerHeader()
1808     {
1809         // The page size
1810         int position = writeData( RECORD_MANAGER_HEADER_BYTES, 0, pageSize );
1811 
1812         // The number of managed B-tree
1813         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, nbBtree );
1814 
1815         // The first free page
1816         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, firstFreePage );
1817 
1818         // The offset of the current B-tree of B-trees
1819         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, currentBtreeOfBtreesOffset );
1820 
1821         // The offset of the copied pages B-tree
1822         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, previousBtreeOfBtreesOffset );
1823 
1824         // The offset of the current B-tree of B-trees
1825         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, currentCopiedPagesBtreeOffset );
1826 
1827         // The offset of the copied pages B-tree
1828         position = writeData( RECORD_MANAGER_HEADER_BYTES, position, previousCopiedPagesBtreeOffset );
1829 
1830         // Write the RecordManager header on disk
1831         RECORD_MANAGER_HEADER_BUFFER.put( RECORD_MANAGER_HEADER_BYTES );
1832         RECORD_MANAGER_HEADER_BUFFER.flip();
1833 
1834         LOG.debug( "Update RM header" );
1835 
1836         if ( LOG_PAGES.isDebugEnabled() )
1837         {
1838             StringBuilder sb = new StringBuilder();
1839 
1840             sb.append( "First free page     : 0x" ).append( Long.toHexString( firstFreePage ) ).append( "\n" );
1841             sb.append( "Current BOB header  : 0x" ).append( Long.toHexString( currentBtreeOfBtreesOffset ) ).append( "\n" );
1842             sb.append( "Previous BOB header : 0x" ).append( Long.toHexString( previousBtreeOfBtreesOffset ) ).append( "\n" );
1843             sb.append( "Current CPB header  : 0x" ).append( Long.toHexString( currentCopiedPagesBtreeOffset ) ).append( "\n" );
1844             sb.append( "Previous CPB header : 0x" ).append( Long.toHexString( previousCopiedPagesBtreeOffset ) ).append( "\n" );
1845 
1846             if ( firstFreePage != NO_PAGE )
1847             {
1848                 long freePage = firstFreePage;
1849                 sb.append( "free pages list : " );
1850 
1851                 boolean isFirst = true;
1852 
1853                 while ( freePage != NO_PAGE )
1854                 {
1855                     if ( isFirst )
1856                     {
1857                         isFirst = false;
1858                     }
1859                     else
1860                     {
1861                         sb.append( " -> " );
1862                     }
1863 
1864                     sb.append( "0x" ).append( Long.toHexString( freePage ) );
1865 
1866                     try
1867                     {
1868                         PageIO[] freePageIO = readPageIOs( freePage, 8 );
1869 
1870                         freePage = freePageIO[0].getNextPage();
1871                     }
1872                     catch ( EndOfFileExceededException e )
1873                     {
1874                         // TODO Auto-generated catch block
1875                         e.printStackTrace();
1876                     }
1877                     catch ( IOException e )
1878                     {
1879                         // TODO Auto-generated catch block
1880                         e.printStackTrace();
1881                     }
1882                 }
1883 
1884             }
1885 
1886             LOG_PAGES.debug( "Update RM Header : \n{}", sb.toString() );
1887         }
1888 
1889         try
1890         {
1891             fileChannel.write( RECORD_MANAGER_HEADER_BUFFER, 0 );
1892         }
1893         catch ( IOException ioe )
1894         {
1895             throw new FileException( ioe.getMessage() );
1896         }
1897 
1898         RECORD_MANAGER_HEADER_BUFFER.clear();
1899 
1900         // Reset the old versions
1901         previousBtreeOfBtreesOffset = -1L;
1902         previousCopiedPagesBtreeOffset = -1L;
1903 
1904         nbUpdateRMHeader.incrementAndGet();
1905     }
1906 
1907 
1908     /**
1909      * Update the RecordManager header, injecting the following data :
1910      *
1911      * <pre>
1912      * +---------------------+
1913      * | PageSize            | 4 bytes : The size of a physical page (default to 4096)
1914      * +---------------------+
1915      * | NbTree              | 4 bytes : The number of managed B-trees (at least 1)
1916      * +---------------------+
1917      * | FirstFree           | 8 bytes : The offset of the first free page
1918      * +---------------------+
1919      * | current BoB offset  | 8 bytes : The offset of the current B-tree of B-trees
1920      * +---------------------+
1921      * | previous BoB offset | 8 bytes : The offset of the previous B-tree of B-trees
1922      * +---------------------+
1923      * | current CP offset   | 8 bytes : The offset of the current CopiedPages B-tree
1924      * +---------------------+
1925      * | previous CP offset  | 8 bytes : The offset of the previous CopiedPages B-tree
1926      * +---------------------+
1927      * </pre>
1928      */
1929     public void updateRecordManagerHeader( long newBtreeOfBtreesOffset, long newCopiedPageBtreeOffset )
1930     {
1931         if ( newBtreeOfBtreesOffset != -1L )
1932         {
1933             previousBtreeOfBtreesOffset = currentBtreeOfBtreesOffset;
1934             currentBtreeOfBtreesOffset = newBtreeOfBtreesOffset;
1935         }
1936 
1937         if ( newCopiedPageBtreeOffset != -1L )
1938         {
1939             previousCopiedPagesBtreeOffset = currentCopiedPagesBtreeOffset;
1940             currentCopiedPagesBtreeOffset = newCopiedPageBtreeOffset;
1941         }
1942     }
1943 
1944 
1945     /**
1946      * Inject an int into a byte[] at a given position.
1947      */
1948     private int writeData( byte[] buffer, int position, int value )
1949     {
1950         RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 24 );
1951         RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 16 );
1952         RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 8 );
1953         RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value );
1954 
1955         return position + 4;
1956     }
1957 
1958 
1959     /**
1960      * Inject a long into a byte[] at a given position.
1961      */
1962     private int writeData( byte[] buffer, int position, long value )
1963     {
1964         RECORD_MANAGER_HEADER_BYTES[position] = ( byte ) ( value >>> 56 );
1965         RECORD_MANAGER_HEADER_BYTES[position+1] = ( byte ) ( value >>> 48 );
1966         RECORD_MANAGER_HEADER_BYTES[position+2] = ( byte ) ( value >>> 40 );
1967         RECORD_MANAGER_HEADER_BYTES[position+3] = ( byte ) ( value >>> 32 );
1968         RECORD_MANAGER_HEADER_BYTES[position+4] = ( byte ) ( value >>> 24 );
1969         RECORD_MANAGER_HEADER_BYTES[position+5] = ( byte ) ( value >>> 16 );
1970         RECORD_MANAGER_HEADER_BYTES[position+6] = ( byte ) ( value >>> 8 );
1971         RECORD_MANAGER_HEADER_BYTES[position+7] = ( byte ) ( value );
1972 
1973         return position + 8;
1974     }
1975 
1976 
1977     /**
1978      * Add a new <btree, revision> tuple into the B-tree of B-trees.
1979      *
1980      * @param name The B-tree name
1981      * @param revision The B-tree revision
1982      * @param btreeHeaderOffset The B-tree offset
1983      * @throws IOException If the update failed
1984      */
1985     /* no qualifier */ <K, V> void addInBtreeOfBtrees( String name, long revision, long btreeHeaderOffset ) throws IOException
1986     {
1987         checkOffset( btreeHeaderOffset );
1988         NameRevision nameRevision = new NameRevision( name, revision );
1989 
1990         btreeOfBtrees.insert( nameRevision, btreeHeaderOffset );
1991 
1992         // Update the B-tree of B-trees offset
1993         currentBtreeOfBtreesOffset = getNewBTreeHeader( BTREE_OF_BTREES_NAME ).getBTreeHeaderOffset();
1994     }
1995 
1996 
1997     /**
1998      * Add a new <btree, revision> tuple into the CopiedPages B-tree.
1999      *
2000      * @param name The B-tree name
2001      * @param revision The B-tree revision
2002      * @param btreeHeaderOffset The B-tree offset
2003      * @throws IOException If the update failed
2004      */
2005     /* no qualifier */ <K, V> void addInCopiedPagesBtree( String name, long revision, List<Page<K, V>> pages ) throws IOException
2006     {
2007         RevisionName revisionName = new RevisionName( revision, name );
2008 
2009         long[] pageOffsets = new long[pages.size()];
2010         int pos = 0;
2011 
2012         for ( Page<K, V> page : pages )
2013         {
2014             pageOffsets[pos++] = ((AbstractPage<K, V>)page).getOffset();
2015         }
2016 
2017         copiedPageBtree.insert( revisionName, pageOffsets );
2018 
2019         // Update the CopiedPageBtree offset
2020         currentCopiedPagesBtreeOffset = ((AbstractBTree<RevisionName, long[]>)copiedPageBtree).getBtreeHeader().getBTreeHeaderOffset();
2021     }
2022 
2023 
2024     /**
2025      * Internal method used to update the B-tree of B-trees offset
2026      * @param btreeOfBtreesOffset The new offset
2027      */
2028     /* no qualifier */ void setBtreeOfBtreesOffset( long btreeOfBtreesOffset )
2029     {
2030         checkOffset( btreeOfBtreesOffset );
2031         this.currentBtreeOfBtreesOffset = btreeOfBtreesOffset;
2032     }
2033 
2034 
2035     /**
2036      * Write the B-tree header on disk. We will write the following informations :
2037      * <pre>
2038      * +------------+
2039      * | revision   | The B-tree revision
2040      * +------------+
2041      * | nbElems    | The B-tree number of elements
2042      * +------------+
2043      * | rootPage   | The root page offset
2044      * +------------+
2045      * | BtreeInfo  | The B-tree info offset
2046      * +------------+
2047      * </pre>
2048      * @param btree The B-tree which header has to be written
2049      * @param btreeInfoOffset The offset of the B-tree informations
2050      * @return The B-tree header offset
2051      * @throws IOException If we weren't able to write the B-tree header
2052      */
2053     /* no qualifier */ <K, V> long writeBtreeHeader( BTree<K, V> btree, BTreeHeader<K, V> btreeHeader ) throws IOException
2054     {
2055         int bufferSize =
2056             LONG_SIZE +                     // The revision
2057             LONG_SIZE +                     // the number of element
2058             LONG_SIZE +                     // The root page offset
2059             LONG_SIZE;                      // The B-tree info page offset
2060 
2061         // Get the pageIOs we need to store the data. We may need more than one.
2062         PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
2063 
2064         // Store the B-tree header Offset into the B-tree
2065         long btreeHeaderOffset = btreeHeaderPageIos[0].getOffset();
2066 
2067         // Now store the B-tree data in the pages :
2068         // - the B-tree revision
2069         // - the B-tree number of elements
2070         // - the B-tree root page offset
2071         // - the B-tree info page offset
2072         // Starts at 0
2073         long position = 0L;
2074 
2075         // The B-tree current revision
2076         position = store( position, btreeHeader.getRevision(), btreeHeaderPageIos );
2077 
2078         // The nb elems in the tree
2079         position = store( position, btreeHeader.getNbElems(), btreeHeaderPageIos );
2080 
2081 
2082         // Now, we can inject the B-tree rootPage offset into the B-tree header
2083         position = store( position, btreeHeader.getRootPageOffset(), btreeHeaderPageIos );
2084 
2085         // The B-tree info page offset
2086         position = store( position, ((PersistedBTree<K, V>)btree).getBtreeInfoOffset(), btreeHeaderPageIos );
2087 
2088         // And flush the pages to disk now
2089         LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
2090 
2091         if ( LOG_PAGES.isDebugEnabled() )
2092         {
2093             LOG_PAGES.debug( "Writing BTreeHeader revision {} for {}", btreeHeader.getRevision(), btree.getName() );
2094             StringBuilder sb = new StringBuilder();
2095 
2096             sb.append( "Offset : " ).append( Long.toHexString( btreeHeaderOffset ) ).append( "\n" );
2097             sb.append( "    Revision : " ).append( btreeHeader.getRevision() ).append( "\n" );
2098             sb.append( "    NbElems  : " ).append( btreeHeader.getNbElems() ).append( "\n" );
2099             sb.append( "    RootPage : 0x" ).append( Long.toHexString( btreeHeader.getRootPageOffset() ) ).append( "\n" );
2100             sb.append( "    Info     : 0x" ).append( Long.toHexString( ((PersistedBTree<K, V>)btree).getBtreeInfoOffset() ) ).append( "\n" );
2101 
2102             LOG_PAGES.debug( "Btree Header[{}]\n{}", btreeHeader.getRevision(), sb.toString() );
2103         }
2104 
2105         flushPages( btreeHeaderPageIos );
2106 
2107         btreeHeader.setBTreeHeaderOffset( btreeHeaderOffset );
2108 
2109         return btreeHeaderOffset;
2110     }
2111 
2112 
2113     /**
2114      * Write the B-tree informations on disk. We will write the following informations :
2115      * <pre>
2116      * +------------+
2117      * | pageSize   | The B-tree page size (ie, the number of elements per page max)
2118      * +------------+
2119      * | nameSize   | The B-tree name size
2120      * +------------+
2121      * | name       | The B-tree name
2122      * +------------+
2123      * | keySerSize | The keySerializer FQCN size
2124      * +------------+
2125      * | keySerFQCN | The keySerializer FQCN
2126      * +------------+
2127      * | valSerSize | The Value serializer FQCN size
2128      * +------------+
2129      * | valSerKQCN | The valueSerializer FQCN
2130      * +------------+
2131      * | dups       | The flags that tell if the dups are allowed
2132      * +------------+
2133      * </pre>
2134      * @param btree The B-tree which header has to be written
2135      * @return The B-tree header offset
2136      * @throws IOException If we weren't able to write the B-tree header
2137      */
2138     private <K, V> long writeBtreeInfo( BTree<K, V> btree ) throws IOException
2139     {
2140         // We will add the newly managed B-tree at the end of the header.
2141         byte[] btreeNameBytes = Strings.getBytesUtf8( btree.getName() );
2142         byte[] keySerializerBytes = Strings.getBytesUtf8( btree.getKeySerializerFQCN() );
2143         byte[] valueSerializerBytes = Strings.getBytesUtf8( btree.getValueSerializerFQCN() );
2144 
2145         int bufferSize =
2146             INT_SIZE +                      // The page size
2147             INT_SIZE +                      // The name size
2148             btreeNameBytes.length +         // The name
2149             INT_SIZE +                      // The keySerializerBytes size
2150             keySerializerBytes.length +     // The keySerializerBytes
2151             INT_SIZE +                      // The valueSerializerBytes size
2152             valueSerializerBytes.length +   // The valueSerializerBytes
2153             INT_SIZE;                       // The allowDuplicates flag
2154 
2155         // Get the pageIOs we need to store the data. We may need more than one.
2156         PageIO[] btreeHeaderPageIos = getFreePageIOs( bufferSize );
2157 
2158         // Keep the B-tree header Offset into the B-tree
2159         long btreeInfoOffset = btreeHeaderPageIos[0].getOffset();
2160 
2161         // Now store the B-tree information data in the pages :
2162         // - the B-tree page size
2163         // - the B-tree name
2164         // - the keySerializer FQCN
2165         // - the valueSerializer FQCN
2166         // - the flags that tell if the dups are allowed
2167         // Starts at 0
2168         long position = 0L;
2169 
2170         // The B-tree page size
2171         position = store( position, btree.getPageSize(), btreeHeaderPageIos );
2172 
2173         // The tree name
2174         position = store( position, btreeNameBytes, btreeHeaderPageIos );
2175 
2176         // The keySerializer FQCN
2177         position = store( position, keySerializerBytes, btreeHeaderPageIos );
2178 
2179         // The valueSerialier FQCN
2180         position = store( position, valueSerializerBytes, btreeHeaderPageIos );
2181 
2182         // The allowDuplicates flag
2183         position = store( position, ( btree.isAllowDuplicates() ? 1 : 0 ), btreeHeaderPageIos );
2184 
2185         // And flush the pages to disk now
2186         LOG.debug( "Flushing the newly managed '{}' btree header", btree.getName() );
2187         flushPages( btreeHeaderPageIos );
2188 
2189         return btreeInfoOffset;
2190     }
2191 
2192 
2193     /**
2194      * Update the B-tree header after a B-tree modification. This will make the latest modification
2195      * visible.<br/>
2196      * We update the following fields :
2197      * <ul>
2198      * <li>the revision</li>
2199      * <li>the number of elements</li>
2200      * <li>the B-tree root page offset</li>
2201      * </ul>
2202      * <br/>
2203      * As a result, a new version of the BtreHeader will be created, which will replace the previous
2204      * B-tree header
2205      * @param btree TheB-tree to update
2206      * @param btreeHeaderOffset The offset of the modified btree header
2207      * @return The offset of the new B-tree Header
2208      * @throws IOException If we weren't able to write the file on disk
2209      * @throws EndOfFileExceededException If we tried to write after the end of the file
2210      */
2211     /* no qualifier */ <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset )
2212         throws EndOfFileExceededException, IOException
2213     {
2214         return updateBtreeHeader( btree, btreeHeaderOffset, false );
2215     }
2216 
2217 
2218     /**
2219      * Update the B-tree header after a B-tree modification. This will make the latest modification
2220      * visible.<br/>
2221      * We update the following fields :
2222      * <ul>
2223      * <li>the revision</li>
2224      * <li>the number of elements</li>
2225      * <li>the reference to the current B-tree revisions</li>
2226      * <li>the reference to the old B-tree revisions</li>
2227      * </ul>
2228      * <br/>
2229      * As a result, we new version of the BtreHeader will be created
2230      * @param btree The B-tree to update
2231      * @param btreeHeaderOffset The offset of the modified btree header
2232      * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
2233      * @throws IOException
2234      * @throws EndOfFileExceededException
2235      */
2236     /* no qualifier */ <K, V> void updateBtreeHeaderOnPlace( BTree<K, V> btree, long btreeHeaderOffset )
2237         throws EndOfFileExceededException,
2238         IOException
2239     {
2240         updateBtreeHeader( btree, btreeHeaderOffset, true );
2241     }
2242 
2243 
2244     /**
2245      * Update the B-tree header after a B-tree modification. This will make the latest modification
2246      * visible.<br/>
2247      * We update the following fields :
2248      * <ul>
2249      * <li>the revision</li>
2250      * <li>the number of elements</li>
2251      * <li>the reference to the current B-tree revisions</li>
2252      * <li>the reference to the old B-tree revisions</li>
2253      * </ul>
2254      * <br/>
2255      * As a result, a new version of the BtreHeader will be created, which may replace the previous
2256      * B-tree header (if the onPlace flag is set to true) or a new set of pageIos will contain the new
2257      * version.
2258      *
2259      * @param btree The B-tree to update
2260      * @param rootPageOffset The offset of the modified rootPage
2261      * @param onPlace Tells if we modify the B-tree on place, or if we create a copy
2262      * @return The offset of the new B-tree Header if it has changed (ie, when the onPlace flag is set to true)
2263      * @throws EndOfFileExceededException If we tried to write after the end of the file
2264      * @throws IOException If tehre were some error while writing the data on disk
2265      */
2266     private <K, V> long updateBtreeHeader( BTree<K, V> btree, long btreeHeaderOffset, boolean onPlace )
2267         throws EndOfFileExceededException, IOException
2268     {
2269         // Read the pageIOs associated with this B-tree
2270         PageIO[] pageIos;
2271         long newBtreeHeaderOffset = NO_PAGE;
2272         long offset = ( ( PersistedBTree<K, V> ) btree ).getBtreeOffset();
2273 
2274         if ( onPlace )
2275         {
2276             // We just have to update the existing BTreeHeader
2277             long headerSize = LONG_SIZE + LONG_SIZE + LONG_SIZE;
2278 
2279             pageIos = readPageIOs( offset, headerSize );
2280 
2281             // Now, update the revision
2282             long position = 0;
2283 
2284             position = store( position, btree.getRevision(), pageIos );
2285             position = store( position, btree.getNbElems(), pageIos );
2286             position = store( position, btreeHeaderOffset, pageIos );
2287 
2288             // Write the pages on disk
2289             if ( LOG.isDebugEnabled() )
2290             {
2291                 LOG.debug( "-----> Flushing the '{}' B-treeHeader", btree.getName() );
2292                 LOG.debug( "  revision : " + btree.getRevision() + ", NbElems : " + btree.getNbElems() + ", btreeHeader offset : 0x"
2293                     + Long.toHexString( btreeHeaderOffset ) );
2294             }
2295 
2296             // Get new place on disk to store the modified BTreeHeader if it's not onPlace
2297             // Rewrite the pages at the same place
2298             LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
2299             flushPages( pageIos );
2300         }
2301         else
2302         {
2303             // We have to read and copy the existing BTreeHeader and to create a new one
2304             pageIos = readPageIOs( offset, Long.MAX_VALUE );
2305 
2306             // Now, copy every read page
2307             PageIO[] newPageIOs = new PageIO[pageIos.length];
2308             int pos = 0;
2309 
2310             for ( PageIO pageIo : pageIos )
2311             {
2312                 // Fetch a free page
2313                 newPageIOs[pos] = fetchNewPage();
2314 
2315                 // keep a track of the allocated and copied pages so that we can
2316                 // free them when we do a commit or rollback, if the btree is an management one
2317                 if ( ( btree.getType() == BTreeTypeEnum.BTREE_OF_BTREES ) || ( btree.getType() == BTreeTypeEnum.COPIED_PAGES_BTREE ) )
2318                 {
2319                     freedPages.add( pageIo );
2320                     allocatedPages.add( newPageIOs[pos] );
2321                 }
2322 
2323                 pageIo.copy( newPageIOs[pos] );
2324 
2325                 if ( pos > 0 )
2326                 {
2327                     newPageIOs[pos - 1].setNextPage( newPageIOs[pos].getOffset() );
2328                 }
2329 
2330                 pos++;
2331             }
2332 
2333             // store the new btree header offset
2334             // and update the revision
2335             long position = 0;
2336 
2337             position = store( position, btree.getRevision(), newPageIOs );
2338             position = store( position, btree.getNbElems(), newPageIOs );
2339             position = store( position, btreeHeaderOffset, newPageIOs );
2340 
2341             // Get new place on disk to store the modified BTreeHeader if it's not onPlace
2342             // Flush the new B-treeHeader on disk
2343             LOG.debug( "Rewriting the B-treeHeader on place for B-tree " + btree.getName() );
2344             flushPages( newPageIOs );
2345 
2346             newBtreeHeaderOffset = newPageIOs[0].getOffset();
2347         }
2348 
2349         nbUpdateBtreeHeader.incrementAndGet();
2350 
2351         if ( LOG_CHECK.isDebugEnabled() )
2352         {
2353             MavibotInspector.check( this );
2354         }
2355 
2356         return newBtreeHeaderOffset;
2357     }
2358 
2359 
2360     /**
2361      * Write the pages on disk, either at the end of the file, or at
2362      * the position they were taken from.
2363      *
2364      * @param pageIos The list of pages to write
2365      * @throws IOException If the write failed
2366      */
2367     private void flushPages( PageIO... pageIos ) throws IOException
2368     {
2369         if ( LOG.isDebugEnabled() )
2370         {
2371             for ( PageIO pageIo : pageIos )
2372             {
2373                 dump( pageIo );
2374             }
2375         }
2376 
2377         for ( PageIO pageIo : pageIos )
2378         {
2379             pageIo.getData().rewind();
2380 
2381             if ( fileChannel.size() < ( pageIo.getOffset() + pageSize ) )
2382             {
2383                 LOG.debug( "Adding a page at the end of the file" );
2384                 // This is a page we have to add to the file
2385                 fileChannel.write( pageIo.getData(), fileChannel.size() );
2386                 //fileChannel.force( false );
2387             }
2388             else
2389             {
2390                 LOG.debug( "Writing a page at position {}", pageIo.getOffset() );
2391                 fileChannel.write( pageIo.getData(), pageIo.getOffset() );
2392                 //fileChannel.force( false );
2393             }
2394 
2395             nbUpdatePageIOs.incrementAndGet();
2396 
2397             pageIo.getData().rewind();
2398         }
2399     }
2400 
2401 
2402     /**
2403      * Compute the page in which we will store data given an offset, when
2404      * we have a list of pages.
2405      *
2406      * @param offset The position in the data
2407      * @return The page number in which the offset will start
2408      */
2409     private int computePageNb( long offset )
2410     {
2411         long pageNb = 0;
2412 
2413         offset -= pageSize - LINK_SIZE - PAGE_SIZE;
2414 
2415         if ( offset < 0 )
2416         {
2417             return ( int ) pageNb;
2418         }
2419 
2420         pageNb = 1 + offset / ( pageSize - LINK_SIZE );
2421 
2422         return ( int ) pageNb;
2423     }
2424 
2425 
2426     /**
2427      * Stores a byte[] into one ore more pageIO (depending if the long is stored
2428      * across a boundary or not)
2429      *
2430      * @param position The position in a virtual byte[] if all the pages were contiguous
2431      * @param bytes The byte[] to serialize
2432      * @param pageIos The pageIOs we have to store the data in
2433      * @return The new offset
2434      */
2435     private long store( long position, byte[] bytes, PageIO... pageIos )
2436     {
2437         if ( bytes != null )
2438         {
2439             // Write the bytes length
2440             position = store( position, bytes.length, pageIos );
2441 
2442             // Compute the page in which we will store the data given the
2443             // current position
2444             int pageNb = computePageNb( position );
2445 
2446             // Get back the buffer in this page
2447             ByteBuffer pageData = pageIos[pageNb].getData();
2448 
2449             // Compute the position in the current page
2450             int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2451 
2452             // Compute the remaining size in the page
2453             int remaining = pageData.capacity() - pagePos;
2454             int nbStored = bytes.length;
2455 
2456             // And now, write the bytes until we have none
2457             while ( nbStored > 0 )
2458             {
2459                 if ( remaining > nbStored )
2460                 {
2461                     pageData.mark();
2462                     pageData.position( pagePos );
2463                     pageData.put( bytes, bytes.length - nbStored, nbStored );
2464                     pageData.reset();
2465                     nbStored = 0;
2466                 }
2467                 else
2468                 {
2469                     pageData.mark();
2470                     pageData.position( pagePos );
2471                     pageData.put( bytes, bytes.length - nbStored, remaining );
2472                     pageData.reset();
2473                     pageNb++;
2474                     pageData = pageIos[pageNb].getData();
2475                     pagePos = LINK_SIZE;
2476                     nbStored -= remaining;
2477                     remaining = pageData.capacity() - pagePos;
2478                 }
2479             }
2480 
2481             // We are done
2482             position += bytes.length;
2483         }
2484         else
2485         {
2486             // No bytes : write 0 and return
2487             position = store( position, 0, pageIos );
2488         }
2489 
2490         return position;
2491     }
2492 
2493 
2494     /**
2495      * Stores a byte[] into one ore more pageIO (depending if the long is stored
2496      * across a boundary or not). We don't add the byte[] size, it's already present
2497      * in the received byte[].
2498      *
2499      * @param position The position in a virtual byte[] if all the pages were contiguous
2500      * @param bytes The byte[] to serialize
2501      * @param pageIos The pageIOs we have to store the data in
2502      * @return The new offset
2503      */
2504     private long storeRaw( long position, byte[] bytes, PageIO... pageIos )
2505     {
2506         if ( bytes != null )
2507         {
2508             // Compute the page in which we will store the data given the
2509             // current position
2510             int pageNb = computePageNb( position );
2511 
2512             // Get back the buffer in this page
2513             ByteBuffer pageData = pageIos[pageNb].getData();
2514 
2515             // Compute the position in the current page
2516             int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2517 
2518             // Compute the remaining size in the page
2519             int remaining = pageData.capacity() - pagePos;
2520             int nbStored = bytes.length;
2521 
2522             // And now, write the bytes until we have none
2523             while ( nbStored > 0 )
2524             {
2525                 if ( remaining > nbStored )
2526                 {
2527                     pageData.mark();
2528                     pageData.position( pagePos );
2529                     pageData.put( bytes, bytes.length - nbStored, nbStored );
2530                     pageData.reset();
2531                     nbStored = 0;
2532                 }
2533                 else
2534                 {
2535                     pageData.mark();
2536                     pageData.position( pagePos );
2537                     pageData.put( bytes, bytes.length - nbStored, remaining );
2538                     pageData.reset();
2539                     pageNb++;
2540 
2541                     if ( pageNb == pageIos.length )
2542                     {
2543                         // We can stop here : we have reach the end of the page
2544                         break;
2545                     }
2546 
2547                     pageData = pageIos[pageNb].getData();
2548                     pagePos = LINK_SIZE;
2549                     nbStored -= remaining;
2550                     remaining = pageData.capacity() - pagePos;
2551                 }
2552             }
2553 
2554             // We are done
2555             position += bytes.length;
2556         }
2557         else
2558         {
2559             // No bytes : write 0 and return
2560             position = store( position, 0, pageIos );
2561         }
2562 
2563         return position;
2564     }
2565 
2566 
2567     /**
2568      * Stores an Integer into one ore more pageIO (depending if the int is stored
2569      * across a boundary or not)
2570      *
2571      * @param position The position in a virtual byte[] if all the pages were contiguous
2572      * @param value The int to serialize
2573      * @param pageIos The pageIOs we have to store the data in
2574      * @return The new offset
2575      */
2576     private long store( long position, int value, PageIO... pageIos )
2577     {
2578         // Compute the page in which we will store the data given the
2579         // current position
2580         int pageNb = computePageNb( position );
2581 
2582         // Compute the position in the current page
2583         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2584 
2585         // Get back the buffer in this page
2586         ByteBuffer pageData = pageIos[pageNb].getData();
2587 
2588         // Compute the remaining size in the page
2589         int remaining = pageData.capacity() - pagePos;
2590 
2591         if ( remaining < INT_SIZE )
2592         {
2593             // We have to copy the serialized length on two pages
2594 
2595             switch ( remaining )
2596             {
2597                 case 3:
2598                     pageData.put( pagePos + 2, ( byte ) ( value >>> 8 ) );
2599                     // Fallthrough !!!
2600 
2601                 case 2:
2602                     pageData.put( pagePos + 1, ( byte ) ( value >>> 16 ) );
2603                     // Fallthrough !!!
2604 
2605                 case 1:
2606                     pageData.put( pagePos, ( byte ) ( value >>> 24 ) );
2607                     break;
2608             }
2609 
2610             // Now deal with the next page
2611             pageData = pageIos[pageNb + 1].getData();
2612             pagePos = LINK_SIZE;
2613 
2614             switch ( remaining )
2615             {
2616                 case 1:
2617                     pageData.put( pagePos, ( byte ) ( value >>> 16 ) );
2618                     // fallthrough !!!
2619 
2620                 case 2:
2621                     pageData.put( pagePos + 2 - remaining, ( byte ) ( value >>> 8 ) );
2622                     // fallthrough !!!
2623 
2624                 case 3:
2625                     pageData.put( pagePos + 3 - remaining, ( byte ) ( value ) );
2626                     break;
2627             }
2628         }
2629         else
2630         {
2631             // Store the value in the page at the selected position
2632             pageData.putInt( pagePos, value );
2633         }
2634 
2635         // Increment the position to reflect the addition of an Int (4 bytes)
2636         position += INT_SIZE;
2637 
2638         return position;
2639     }
2640 
2641 
2642     /**
2643      * Stores a Long into one ore more pageIO (depending if the long is stored
2644      * across a boundary or not)
2645      *
2646      * @param position The position in a virtual byte[] if all the pages were contiguous
2647      * @param value The long to serialize
2648      * @param pageIos The pageIOs we have to store the data in
2649      * @return The new offset
2650      */
2651     private long store( long position, long value, PageIO... pageIos )
2652     {
2653         // Compute the page in which we will store the data given the
2654         // current position
2655         int pageNb = computePageNb( position );
2656 
2657         // Compute the position in the current page
2658         int pagePos = ( int ) ( position + ( pageNb + 1 ) * LONG_SIZE + INT_SIZE ) - pageNb * pageSize;
2659 
2660         // Get back the buffer in this page
2661         ByteBuffer pageData = pageIos[pageNb].getData();
2662 
2663         // Compute the remaining size in the page
2664         int remaining = pageData.capacity() - pagePos;
2665 
2666         if ( remaining < LONG_SIZE )
2667         {
2668             // We have to copy the serialized length on two pages
2669 
2670             switch ( remaining )
2671             {
2672                 case 7:
2673                     pageData.put( pagePos + 6, ( byte ) ( value >>> 8 ) );
2674                     // Fallthrough !!!
2675 
2676                 case 6:
2677                     pageData.put( pagePos + 5, ( byte ) ( value >>> 16 ) );
2678                     // Fallthrough !!!
2679 
2680                 case 5:
2681                     pageData.put( pagePos + 4, ( byte ) ( value >>> 24 ) );
2682                     // Fallthrough !!!
2683 
2684                 case 4:
2685                     pageData.put( pagePos + 3, ( byte ) ( value >>> 32 ) );
2686                     // Fallthrough !!!
2687 
2688                 case 3:
2689                     pageData.put( pagePos + 2, ( byte ) ( value >>> 40 ) );
2690                     // Fallthrough !!!
2691 
2692                 case 2:
2693                     pageData.put( pagePos + 1, ( byte ) ( value >>> 48 ) );
2694                     // Fallthrough !!!
2695 
2696                 case 1:
2697                     pageData.put( pagePos, ( byte ) ( value >>> 56 ) );
2698                     break;
2699             }
2700 
2701             // Now deal with the next page
2702             pageData = pageIos[pageNb + 1].getData();
2703             pagePos = LINK_SIZE;
2704 
2705             switch ( remaining )
2706             {
2707                 case 1:
2708                     pageData.put( pagePos, ( byte ) ( value >>> 48 ) );
2709                     // fallthrough !!!
2710 
2711                 case 2:
2712                     pageData.put( pagePos + 2 - remaining, ( byte ) ( value >>> 40 ) );
2713                     // fallthrough !!!
2714 
2715                 case 3:
2716                     pageData.put( pagePos + 3 - remaining, ( byte ) ( value >>> 32 ) );
2717                     // fallthrough !!!
2718 
2719                 case 4:
2720                     pageData.put( pagePos + 4 - remaining, ( byte ) ( value >>> 24 ) );
2721                     // fallthrough !!!
2722 
2723                 case 5:
2724                     pageData.put( pagePos + 5 - remaining, ( byte ) ( value >>> 16 ) );
2725                     // fallthrough !!!
2726 
2727                 case 6:
2728                     pageData.put( pagePos + 6 - remaining, ( byte ) ( value >>> 8 ) );
2729                     // fallthrough !!!
2730 
2731                 case 7:
2732                     pageData.put( pagePos + 7 - remaining, ( byte ) ( value ) );
2733                     break;
2734             }
2735         }
2736         else
2737         {
2738             // Store the value in the page at the selected position
2739             pageData.putLong( pagePos, value );
2740         }
2741 
2742         // Increment the position to reflect the addition of an Long (8 bytes)
2743         position += LONG_SIZE;
2744 
2745         return position;
2746     }
2747 
2748 
2749     /**
2750      * Write the page in a serialized form.
2751      *
2752      * @param btree The persistedBtree we will create a new PageHolder for
2753      * @param newPage The page to write on disk
2754      * @param newRevision The page's revision
2755      * @return A PageHolder containing the copied page
2756      * @throws IOException If the page can't be written on disk
2757      */
2758     /* No qualifier*/<K, V> PageHolder<K, V> writePage( BTree<K, V> btree, Page<K, V> newPage,
2759         long newRevision ) throws IOException
2760     {
2761         // We first need to save the new page on disk
2762         PageIO[] pageIos = serializePage( btree, newRevision, newPage );
2763 
2764         if ( LOG_PAGES.isDebugEnabled() )
2765         {
2766             LOG_PAGES.debug( "Write data for '{}' btree", btree.getName()  );
2767 
2768             logPageIos( pageIos );
2769         }
2770 
2771         // Write the page on disk
2772         flushPages( pageIos );
2773 
2774         // Build the resulting reference
2775         long offset = pageIos[0].getOffset();
2776         long lastOffset = pageIos[pageIos.length - 1].getOffset();
2777         PersistedPageHolder<K, V> pageHolder = new PersistedPageHolder<K, V>( btree, newPage, offset,
2778             lastOffset );
2779 
2780         return pageHolder;
2781     }
2782 
2783 
2784     /* No qualifier */ static void logPageIos( PageIO[] pageIos )
2785     {
2786         int pageNb = 0;
2787 
2788         for ( PageIO pageIo : pageIos )
2789         {
2790             StringBuilder sb = new StringBuilder();
2791             sb.append( "PageIO[" ).append( pageNb ).append( "]:0x" );
2792             sb.append( Long.toHexString( pageIo.getOffset() ) ).append( "/");
2793             sb.append( pageIo.getSize() );
2794             pageNb++;
2795 
2796             ByteBuffer data = pageIo.getData();
2797 
2798             int position = data.position();
2799             int dataLength = (int)pageIo.getSize() + 12;
2800             
2801             if ( dataLength > data.limit() )
2802             {
2803                 dataLength = data.limit();
2804             }
2805             
2806             byte[] bytes = new byte[dataLength];
2807 
2808             data.get( bytes );
2809             data.position( position );
2810             int pos = 0;
2811 
2812             for ( byte b : bytes )
2813             {
2814                 int mod = pos%16;
2815 
2816                 switch ( mod )
2817                 {
2818                     case 0:
2819                         sb.append( "\n    " );
2820                         // No break
2821                     case 4:
2822                     case 8:
2823                     case 12:
2824                         sb.append( " " );
2825                     case 1:
2826                     case 2:
2827                     case 3:
2828                     case 5:
2829                     case 6:
2830                     case 7:
2831                     case 9:
2832                     case 10:
2833                     case 11:
2834                     case 13:
2835                     case 14:
2836                     case 15:
2837                         sb.append( Strings.dumpByte( b ) ).append( " " );
2838                 }
2839                 pos++;
2840             }
2841 
2842             LOG_PAGES.debug( sb.toString() );
2843         }
2844     }
2845 
2846 
2847     /**
2848      * Compute the number of pages needed to store some specific size of data.
2849      *
2850      * @param dataSize The size of the data we want to store in pages
2851      * @return The number of pages needed
2852      */
2853     private int computeNbPages( int dataSize )
2854     {
2855         if ( dataSize <= 0 )
2856         {
2857             return 0;
2858         }
2859 
2860         // Compute the number of pages needed.
2861         // Considering that each page can contain PageSize bytes,
2862         // but that the first 8 bytes are used for links and we
2863         // use 4 bytes to store the data size, the number of needed
2864         // pages is :
2865         // NbPages = ( (dataSize - (PageSize - 8 - 4 )) / (PageSize - 8) ) + 1
2866         // NbPages += ( if (dataSize - (PageSize - 8 - 4 )) % (PageSize - 8) > 0 : 1 : 0 )
2867         int availableSize = ( pageSize - LONG_SIZE );
2868         int nbNeededPages = 1;
2869 
2870         // Compute the number of pages that will be full but the first page
2871         if ( dataSize > availableSize - INT_SIZE )
2872         {
2873             int remainingSize = dataSize - ( availableSize - INT_SIZE );
2874             nbNeededPages += remainingSize / availableSize;
2875             int remain = remainingSize % availableSize;
2876 
2877             if ( remain > 0 )
2878             {
2879                 nbNeededPages++;
2880             }
2881         }
2882 
2883         return nbNeededPages;
2884     }
2885 
2886 
2887     /**
2888      * Get as many pages as needed to store the data of the given size. The returned
2889      * PageIOs are all linked together.
2890      *
2891      * @param dataSize The data size
2892      * @return An array of pages, enough to store the full data
2893      */
2894     private PageIO[] getFreePageIOs( int dataSize ) throws IOException
2895     {
2896         if ( dataSize == 0 )
2897         {
2898             return new PageIO[]
2899                 {};
2900         }
2901 
2902         int nbNeededPages = computeNbPages( dataSize );
2903 
2904         PageIO[] pageIOs = new PageIO[nbNeededPages];
2905 
2906         // The first page : set the size
2907         pageIOs[0] = fetchNewPage();
2908         pageIOs[0].setSize( dataSize );
2909 
2910         for ( int i = 1; i < nbNeededPages; i++ )
2911         {
2912             pageIOs[i] = fetchNewPage();
2913 
2914             // Create the link
2915             pageIOs[i - 1].setNextPage( pageIOs[i].getOffset() );
2916         }
2917 
2918         return pageIOs;
2919     }
2920 
2921 
2922     /**
2923      * Return a new Page. We take one of the existing free pages, or we create
2924      * a new page at the end of the file.
2925      *
2926      * @return The fetched PageIO
2927      */
2928     private PageIO fetchNewPage() throws IOException
2929     {
2930         //dumpFreePages( firstFreePage );
2931 
2932         if ( firstFreePage == NO_PAGE )
2933         {
2934             nbCreatedPages.incrementAndGet();
2935 
2936             // We don't have any free page. Reclaim some new page at the end
2937             // of the file
2938             PageIO newPage = new PageIO( endOfFileOffset );
2939 
2940             endOfFileOffset += pageSize;
2941 
2942             ByteBuffer data = ByteBuffer.allocateDirect( pageSize );
2943 
2944             newPage.setData( data );
2945             newPage.setNextPage( NO_PAGE );
2946             newPage.setSize( 0 );
2947 
2948             LOG.debug( "Requiring a new page at offset {}", newPage.getOffset() );
2949 
2950             return newPage;
2951         }
2952         else
2953         {
2954             nbReusedPages.incrementAndGet();
2955 
2956             // We have some existing free page. Fetch it from disk
2957             PageIO pageIo = fetchPage( firstFreePage );
2958 
2959             // Update the firstFreePage pointer
2960             firstFreePage = pageIo.getNextPage();
2961 
2962             // overwrite the data of old page
2963             ByteBuffer data = ByteBuffer.allocateDirect( pageSize );
2964             pageIo.setData( data );
2965 
2966             pageIo.setNextPage( NO_PAGE );
2967             pageIo.setSize( 0 );
2968 
2969             LOG.debug( "Reused page at offset {}", pageIo.getOffset() );
2970 
2971             return pageIo;
2972         }
2973     }
2974 
2975 
2976     /**
2977      * fetch a page from disk, knowing its position in the file.
2978      *
2979      * @param offset The position in the file
2980      * @return The found page
2981      */
2982     /* no qualifier */ PageIO fetchPage( long offset ) throws IOException, EndOfFileExceededException
2983     {
2984         checkOffset( offset );
2985 
2986         if ( fileChannel.size() < offset + pageSize )
2987         {
2988             // Error : we are past the end of the file
2989             throw new EndOfFileExceededException( "We are fetching a page on " + offset +
2990                 " when the file's size is " + fileChannel.size() );
2991         }
2992         else
2993         {
2994             // Read the page
2995             fileChannel.position( offset );
2996 
2997             ByteBuffer data = ByteBuffer.allocate( pageSize );
2998             fileChannel.read( data );
2999             data.rewind();
3000 
3001             PageIO readPage = new PageIO( offset );
3002             readPage.setData( data );
3003 
3004             return readPage;
3005         }
3006     }
3007 
3008 
3009     /**
3010      * @return the pageSize
3011      */
3012     public int getPageSize()
3013     {
3014         return pageSize;
3015     }
3016 
3017 
3018     /**
3019      * Set the page size, ie the number of bytes a page can store.
3020      *
3021      * @param pageSize The number of bytes for a page
3022      */
3023     /* no qualifier */ void setPageSize( int pageSize )
3024     {
3025         if ( this.pageSize >= 13 )
3026         {
3027             this.pageSize = pageSize;
3028         }
3029         else
3030         {
3031             this.pageSize = DEFAULT_PAGE_SIZE;
3032         }
3033     }
3034 
3035 
3036     /**
3037      * Close the RecordManager and flush everything on disk
3038      */
3039     public void close() throws IOException
3040     {
3041         beginTransaction();
3042 
3043         // Close all the managed B-trees
3044         for ( BTree<Object, Object> tree : managedBtrees.values() )
3045         {
3046             tree.close();
3047         }
3048 
3049         // Close the management B-trees
3050         copiedPageBtree.close();
3051         btreeOfBtrees.close();
3052 
3053         managedBtrees.clear();
3054 
3055         // Write the data
3056         fileChannel.force( true );
3057 
3058         // And close the channel
3059         fileChannel.close();
3060 
3061         commit();
3062     }
3063 
3064 
3065     /** Hex chars */
3066     private static final byte[] HEX_CHAR = new byte[]
3067         { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
3068 
3069 
3070     public static String dump( byte octet )
3071     {
3072         return new String( new byte[]
3073             { HEX_CHAR[( octet & 0x00F0 ) >> 4], HEX_CHAR[octet & 0x000F] } );
3074     }
3075 
3076 
3077     /**
3078      * Dump a pageIO
3079      */
3080     private void dump( PageIO pageIo )
3081     {
3082         ByteBuffer buffer = pageIo.getData();
3083         buffer.mark();
3084         byte[] longBuffer = new byte[LONG_SIZE];
3085         byte[] intBuffer = new byte[INT_SIZE];
3086 
3087         // get the next page offset
3088         buffer.get( longBuffer );
3089         long nextOffset = LongSerializer.deserialize( longBuffer );
3090 
3091         // Get the data size
3092         buffer.get( intBuffer );
3093         int size = IntSerializer.deserialize( intBuffer );
3094 
3095         buffer.reset();
3096 
3097         System.out.println( "PageIO[" + Long.toHexString( pageIo.getOffset() ) + "], size = " + size + ", NEXT PageIO:"
3098             + Long.toHexString( nextOffset ) );
3099         System.out.println( " 0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F " );
3100         System.out.println( "+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+" );
3101 
3102         for ( int i = 0; i < buffer.limit(); i += 16 )
3103         {
3104             System.out.print( "|" );
3105 
3106             for ( int j = 0; j < 16; j++ )
3107             {
3108                 System.out.print( dump( buffer.get() ) );
3109 
3110                 if ( j == 15 )
3111                 {
3112                     System.out.println( "|" );
3113                 }
3114                 else
3115                 {
3116                     System.out.print( " " );
3117                 }
3118             }
3119         }
3120 
3121         System.out.println( "+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+" );
3122 
3123         buffer.reset();
3124     }
3125 
3126 
3127     /**
3128      * Dump the RecordManager file
3129      * @throws IOException
3130      */
3131     public void dump()
3132     {
3133         System.out.println( "/---------------------------- Dump ----------------------------\\" );
3134 
3135         try
3136         {
3137             RandomAccessFile randomFile = new RandomAccessFile( file, "r" );
3138             FileChannel fileChannel = randomFile.getChannel();
3139 
3140             ByteBuffer recordManagerHeader = ByteBuffer.allocate( RECORD_MANAGER_HEADER_SIZE );
3141 
3142             // load the RecordManager header
3143             fileChannel.read( recordManagerHeader );
3144 
3145             recordManagerHeader.rewind();
3146 
3147             // The page size
3148             long fileSize = fileChannel.size();
3149             int pageSize = recordManagerHeader.getInt();
3150             long nbPages = fileSize / pageSize;
3151 
3152             // The number of managed B-trees
3153             int nbBtree = recordManagerHeader.getInt();
3154 
3155             // The first free page
3156             long firstFreePage = recordManagerHeader.getLong();
3157 
3158             // The current B-tree of B-trees
3159             long currentBtreeOfBtreesPage = recordManagerHeader.getLong();
3160 
3161             // The previous B-tree of B-trees
3162             long previousBtreeOfBtreesPage = recordManagerHeader.getLong();
3163 
3164             // The current CopiedPages B-tree
3165             long currentCopiedPagesBtreePage = recordManagerHeader.getLong();
3166 
3167             // The previous CopiedPages B-tree
3168             long previousCopiedPagesBtreePage = recordManagerHeader.getLong();
3169 
3170             System.out.println( "  RecordManager" );
3171             System.out.println( "  -------------" );
3172             System.out.println( "  Size = 0x" + Long.toHexString( fileSize ) );
3173             System.out.println( "  NbPages = " + nbPages );
3174             System.out.println( "    Header " );
3175             System.out.println( "      page size : " + pageSize );
3176             System.out.println( "      nbTree : " + nbBtree );
3177             System.out.println( "      firstFreePage : 0x" + Long.toHexString( firstFreePage ) );
3178             System.out.println( "      current BOB : 0x" + Long.toHexString( currentBtreeOfBtreesPage ) );
3179             System.out.println( "      previous BOB : 0x" + Long.toHexString( previousBtreeOfBtreesPage ) );
3180             System.out.println( "      current CopiedPages : 0x" + Long.toHexString( currentCopiedPagesBtreePage ) );
3181             System.out.println( "      previous CopiedPages : 0x" + Long.toHexString( previousCopiedPagesBtreePage ) );
3182 
3183             // Dump the Free pages list
3184             dumpFreePages( firstFreePage );
3185 
3186             // Dump the B-tree of B-trees
3187             dumpBtreeHeader( currentBtreeOfBtreesPage );
3188 
3189             // Dump the previous B-tree of B-trees if any
3190             if ( previousBtreeOfBtreesPage != NO_PAGE )
3191             {
3192                 dumpBtreeHeader( previousBtreeOfBtreesPage );
3193             }
3194 
3195             // Dump the CopiedPages B-tree
3196             dumpBtreeHeader( currentCopiedPagesBtreePage );
3197 
3198 
3199             // Dump the previous B-tree of B-trees if any
3200             if ( previousCopiedPagesBtreePage != NO_PAGE )
3201             {
3202                 dumpBtreeHeader( previousCopiedPagesBtreePage );
3203             }
3204 
3205             // Dump all the user's B-tree
3206             randomFile.close();
3207             System.out.println( "\\---------------------------- Dump ----------------------------/" );
3208         }
3209         catch ( IOException ioe )
3210         {
3211             System.out.println( "Exception while dumping the file : " + ioe.getMessage() );
3212         }
3213     }
3214 
3215 
3216     /**
3217      * Dump the free pages
3218      */
3219     private void dumpFreePages( long freePageOffset ) throws EndOfFileExceededException, IOException
3220     {
3221         System.out.println( "\n  FreePages : " );
3222         int pageNb = 1;
3223 
3224         while ( freePageOffset != NO_PAGE )
3225         {
3226             PageIO pageIo = fetchPage( freePageOffset );
3227 
3228             System.out.println( "    freePage[" + pageNb + "] : 0x" + Long.toHexString( pageIo.getOffset() ) );
3229 
3230             freePageOffset = pageIo.getNextPage();
3231             pageNb++;
3232         }
3233     }
3234 
3235 
3236     /**
3237      * Dump a B-tree Header
3238      */
3239     private long dumpBtreeHeader( long btreeOffset ) throws EndOfFileExceededException, IOException
3240     {
3241         // First read the B-tree header
3242         PageIO[] pageIos = readPageIOs( btreeOffset, Long.MAX_VALUE );
3243 
3244         long dataPos = 0L;
3245 
3246         // The B-tree current revision
3247         long revision = readLong( pageIos, dataPos );
3248         dataPos += LONG_SIZE;
3249 
3250         // The nb elems in the tree
3251         long nbElems = readLong( pageIos, dataPos );
3252         dataPos += LONG_SIZE;
3253 
3254         // The B-tree rootPage offset
3255         long rootPageOffset = readLong( pageIos, dataPos );
3256         dataPos += LONG_SIZE;
3257 
3258         // The B-tree page size
3259         int btreePageSize = readInt( pageIos, dataPos );
3260         dataPos += INT_SIZE;
3261 
3262         // The tree name
3263         ByteBuffer btreeNameBytes = readBytes( pageIos, dataPos );
3264         dataPos += INT_SIZE + btreeNameBytes.limit();
3265         String btreeName = Strings.utf8ToString( btreeNameBytes );
3266 
3267         // The keySerializer FQCN
3268         ByteBuffer keySerializerBytes = readBytes( pageIos, dataPos );
3269         dataPos += INT_SIZE + keySerializerBytes.limit();
3270 
3271         String keySerializerFqcn = "";
3272 
3273         if ( keySerializerBytes != null )
3274         {
3275             keySerializerFqcn = Strings.utf8ToString( keySerializerBytes );
3276         }
3277 
3278         // The valueSerialier FQCN
3279         ByteBuffer valueSerializerBytes = readBytes( pageIos, dataPos );
3280 
3281         String valueSerializerFqcn = "";
3282         dataPos += INT_SIZE + valueSerializerBytes.limit();
3283 
3284         if ( valueSerializerBytes != null )
3285         {
3286             valueSerializerFqcn = Strings.utf8ToString( valueSerializerBytes );
3287         }
3288 
3289         // The B-tree allowDuplicates flag
3290         int allowDuplicates = readInt( pageIos, dataPos );
3291         boolean dupsAllowed = allowDuplicates != 0;
3292 
3293         dataPos += INT_SIZE;
3294 
3295 //        System.out.println( "\n  B-Tree " + btreeName );
3296 //        System.out.println( "  ------------------------- " );
3297 
3298 //        System.out.println( "    nbPageIOs[" + pageIos.length + "] = " + pageIoList );
3299         if ( LOG.isDebugEnabled() )
3300         {
3301             StringBuilder sb = new StringBuilder();
3302             boolean isFirst = true;
3303 
3304             for ( PageIO pageIo : pageIos )
3305             {
3306                 if ( isFirst )
3307                 {
3308                     isFirst = false;
3309                 }
3310                 else
3311                 {
3312                     sb.append( ", " );
3313                 }
3314 
3315                 sb.append( "0x" ).append( Long.toHexString( pageIo.getOffset() ) );
3316             }
3317 
3318             String pageIoList = sb.toString();
3319 
3320             LOG.debug( "    PageIOs[{}] = {}", pageIos.length, pageIoList );
3321 
3322 //        System.out.println( "    dataSize = "+ pageIos[0].getSize() );
3323             LOG.debug( "    dataSize = {}", pageIos[0].getSize() );
3324 
3325             LOG.debug( "    B-tree '{}'", btreeName );
3326             LOG.debug( "    revision : {}", revision );
3327             LOG.debug( "    nbElems : {}", nbElems );
3328             LOG.debug( "    rootPageOffset : 0x{}", Long.toHexString( rootPageOffset ) );
3329             LOG.debug( "    B-tree page size : {}", btreePageSize );
3330             LOG.debug( "    keySerializer : '{}'", keySerializerFqcn );
3331             LOG.debug( "    valueSerializer : '{}'", valueSerializerFqcn );
3332             LOG.debug( "    dups allowed : {}", dupsAllowed );
3333 //
3334 //        System.out.println( "    B-tree '" + btreeName + "'" );
3335 //        System.out.println( "    revision : " + revision );
3336 //        System.out.println( "    nbElems : " + nbElems );
3337 //        System.out.println( "    rootPageOffset : 0x" + Long.toHexString( rootPageOffset ) );
3338 //        System.out.println( "    B-tree page size : " + btreePageSize );
3339 //        System.out.println( "    keySerializer : " + keySerializerFqcn );
3340 //        System.out.println( "    valueSerializer : " + valueSerializerFqcn );
3341 //        System.out.println( "    dups allowed : " + dupsAllowed );
3342         }
3343 
3344         return rootPageOffset;
3345     }
3346 
3347 
3348     /**
3349      * Get the number of managed trees. We don't count the CopiedPage B-tree and the B-tree of B-trees
3350      *
3351      * @return The number of managed B-trees
3352      */
3353     public int getNbManagedTrees()
3354     {
3355         return nbBtree;
3356     }
3357 
3358 
3359     /**
3360      * Get the managed B-trees. We don't return the CopiedPage B-tree nor the B-tree of B-trees.
3361      *
3362      * @return The managed B-trees
3363      */
3364     public Set<String> getManagedTrees()
3365     {
3366         Set<String> btrees = new HashSet<String>( managedBtrees.keySet() );
3367 
3368         return btrees;
3369     }
3370 
3371 
3372     /**
3373      * Stores the copied pages into the CopiedPages B-tree
3374      *
3375      * @param name The B-tree name
3376      * @param revision The revision
3377      * @param copiedPages The pages that have been copied while creating this revision
3378      * @throws IOException If we weren't able to store the data on disk
3379      */
3380     /* No Qualifier */ void storeCopiedPages( String name, long revision, long[] copiedPages ) throws IOException
3381     {
3382         RevisionName revisionName = new RevisionName( revision, name );
3383 
3384         copiedPageBtree.insert( revisionName, copiedPages );
3385     }
3386 
3387 
3388     /**
3389      * Store a reference to an old rootPage into the Revision B-tree
3390      *
3391      * @param btree The B-tree we want to keep an old RootPage for
3392      * @param rootPage The old rootPage
3393      * @throws IOException If we have an issue while writing on disk
3394      */
3395     /* No qualifier */<K, V> void storeRootPage( BTree<K, V> btree, Page<K, V> rootPage ) throws IOException
3396     {
3397         if ( !isKeepRevisions() )
3398         {
3399             return;
3400         }
3401 
3402         if ( btree == copiedPageBtree )
3403         {
3404             return;
3405         }
3406 
3407         NameRevision nameRevision = new NameRevision( btree.getName(), rootPage.getRevision() );
3408 
3409         ( ( AbstractBTree<NameRevision, Long> ) btreeOfBtrees ).insert( nameRevision,
3410             ( ( AbstractPage<K, V> ) rootPage ).getOffset(), 0 );
3411 
3412         if ( LOG_CHECK.isDebugEnabled() )
3413         {
3414             MavibotInspector.check( this );
3415         }
3416     }
3417 
3418 
3419     /**
3420      * Fetch the rootPage of a given B-tree for a given revision.
3421      *
3422      * @param btree The B-tree we are interested in
3423      * @param revision The revision we want to get back
3424      * @return The rootPage for this B-tree and this revision, if any
3425      * @throws KeyNotFoundException If we can't find the rootPage for this revision and this B-tree
3426      * @throws IOException If we had an ise while accessing the data on disk
3427      */
3428     /* No qualifier */<K, V> Page<K, V> getRootPage( BTree<K, V> btree, long revision ) throws KeyNotFoundException,
3429         IOException
3430     {
3431         if ( btree.getRevision() == revision )
3432         {
3433             // We are asking for the current revision
3434             return btree.getRootPage();
3435         }
3436 
3437         // Get the B-tree header offset
3438         NameRevision nameRevision = new NameRevision( btree.getName(), revision );
3439         long btreeHeaderOffset = btreeOfBtrees.get( nameRevision );
3440 
3441         // get the B-tree rootPage
3442         Page<K, V> btreeRoot = readRootPage( btree, btreeHeaderOffset );
3443 
3444         return btreeRoot;
3445     }
3446 
3447 
3448     /**
3449      * Read a root page from the B-tree header offset
3450      */
3451     private <K, V> Page<K, V> readRootPage( BTree<K, V> btree, long btreeHeaderOffset ) throws EndOfFileExceededException, IOException
3452     {
3453         // Read the B-tree header pages on disk
3454         PageIO[] btreeHeaderPageIos = readPageIOs( btreeHeaderOffset, Long.MAX_VALUE );
3455         long dataPos = LONG_SIZE + LONG_SIZE;
3456 
3457         // The B-tree rootPage offset
3458         long rootPageOffset = readLong( btreeHeaderPageIos, dataPos );
3459 
3460         // Read the rootPage pages on disk
3461         PageIO[] rootPageIos = readPageIOs( rootPageOffset, Long.MAX_VALUE );
3462 
3463         // Now, convert it to a Page
3464         Page<K, V> btreeRoot = readPage( btree, rootPageIos );
3465 
3466         return btreeRoot;
3467     }
3468 
3469 
3470     /**
3471      * Get one managed trees, knowing its name.
3472      *
3473      * @param name The B-tree name we are looking for
3474      * @return The managed B-trees
3475      */
3476     public <K, V> BTree<K, V> getManagedTree( String name )
3477     {
3478         return ( BTree<K, V> ) managedBtrees.get( name );
3479     }
3480 
3481 
3482     /**
3483      * Move a list of pages to the free page list. A logical page is associated with one
3484      * or more physical PageIOs, which are on the disk. We have to move all those PagIO instances
3485      * to the free list, and do the same in memory (we try to keep a reference to a set of
3486      * free pages.
3487      *
3488      * @param btree The B-tree which were owning the pages
3489      * @param revision The current revision
3490      * @param pages The pages to free
3491      * @throws IOException If we had a problem while updating the file
3492      * @throws EndOfFileExceededException If we tried to write after the end of the file
3493      */
3494     /* Package protected */<K, V> void freePages( BTree<K, V> btree, long revision, List<Page<K, V>> pages )
3495         throws EndOfFileExceededException, IOException
3496     {
3497         if ( ( pages == null ) || pages.isEmpty() )
3498         {
3499             return;
3500         }
3501 
3502         if ( !keepRevisions )
3503         {
3504             // if the B-tree doesn't keep revisions, we can safely move
3505             // the pages to the freed page list.
3506             if ( LOG.isDebugEnabled() )
3507             {
3508                 LOG.debug( "Freeing the following pages :" );
3509 
3510                 for ( Page<K, V> page : pages )
3511                 {
3512                     LOG.debug(  "    {}", page );
3513                 }
3514             }
3515 
3516             for ( Page<K, V> page : pages )
3517             {
3518                 long pageOffset = ((AbstractPage<K, V>)page).getOffset();
3519 
3520                 PageIO[] pageIos = readPageIOs( pageOffset, Long.MAX_VALUE );
3521 
3522                 for ( PageIO pageIo : pageIos )
3523                 {
3524                     freedPages.add( pageIo );
3525                 }
3526             }
3527         }
3528         else
3529         {
3530             // We are keeping revisions of standard B-trees, so we move the pages to the CopiedPages B-tree
3531             // but only for non managed B-trees
3532             if ( LOG.isDebugEnabled() )
3533             {
3534                 LOG.debug( "Moving the following pages to the CopiedBtree :" );
3535 
3536                 for ( Page<K, V> page : pages )
3537                 {
3538                     LOG.debug(  "    {}", page );
3539                 }
3540             }
3541 
3542             long[] pageOffsets = new long[pages.size()];
3543             int pos = 0;
3544 
3545             for ( Page<K, V> page : pages )
3546             {
3547                 pageOffsets[pos++] = ((AbstractPage<K, V>)page).offset;
3548             }
3549 
3550             if ( ( btree.getType() != BTreeTypeEnum.BTREE_OF_BTREES ) && ( btree.getType() != BTreeTypeEnum.COPIED_PAGES_BTREE ) )
3551             {
3552                 // Deal with standard B-trees
3553                 RevisionName revisionName = new RevisionName( revision, btree.getName() );
3554 
3555                 copiedPageBtree.insert( revisionName, pageOffsets );
3556 
3557                 // Update the RecordManager Copiedpage Offset
3558                 currentCopiedPagesBtreeOffset = ((PersistedBTree<RevisionName, long[]>)copiedPageBtree).getBtreeOffset();
3559             }
3560             else
3561             {
3562                 // Managed B-trees : we simply free the copied pages
3563                 for ( long pageOffset : pageOffsets )
3564                 {
3565                     PageIO[] pageIos = readPageIOs( pageOffset, Long.MAX_VALUE );
3566 
3567                     for ( PageIO pageIo : pageIos )
3568                     {
3569                         freedPages.add( pageIo );
3570                     }
3571                 }
3572             }
3573         }
3574     }
3575 
3576 
3577     /**
3578      * Add a PageIO to the list of free PageIOs
3579      *
3580      * @param pageIo The page to free
3581      * @throws IOException If we weren't capable of updating the file
3582      */
3583     private void free( PageIO pageIo ) throws IOException
3584     {
3585         // We add the Page's PageIOs before the
3586         // existing free pages.
3587         // Link it to the first free page
3588         pageIo.setNextPage( firstFreePage );
3589 
3590         LOG.debug( "Flushing the first free page" );
3591 
3592         // And flush it to disk
3593         flushPages( pageIo );
3594 
3595         // We can update the firstFreePage offset
3596         firstFreePage = pageIo.getOffset();
3597     }
3598 
3599 
3600     /**
3601      * @return the keepRevisions flag
3602      */
3603     public boolean isKeepRevisions()
3604     {
3605         return keepRevisions;
3606     }
3607 
3608 
3609     /**
3610      * @param keepRevisions the keepRevisions flag to set
3611      */
3612     public void setKeepRevisions( boolean keepRevisions )
3613     {
3614         this.keepRevisions = keepRevisions;
3615     }
3616 
3617 
3618     /**
3619      * Creates a B-tree and automatically adds it to the list of managed btrees
3620      *
3621      * @param name the name of the B-tree
3622      * @param keySerializer key serializer
3623      * @param valueSerializer value serializer
3624      * @param allowDuplicates flag for allowing duplicate keys
3625      * @return a managed B-tree
3626      * @throws IOException If we weren't able to update the file on disk
3627      * @throws BTreeAlreadyManagedException If the B-tree is already managed
3628      */
3629     @SuppressWarnings("all")
3630     public <K, V> BTree<K, V> addBTree( String name, ElementSerializer<K> keySerializer,
3631         ElementSerializer<V> valueSerializer, boolean allowDuplicates )
3632             throws IOException, BTreeAlreadyManagedException
3633     {
3634         PersistedBTreeConfiguration config = new PersistedBTreeConfiguration();
3635 
3636         config.setName( name );
3637         config.setKeySerializer( keySerializer );
3638         config.setValueSerializer( valueSerializer );
3639         config.setAllowDuplicates( allowDuplicates );
3640 
3641         BTree btree = new PersistedBTree( config );
3642         manage( btree );
3643 
3644         if ( LOG_CHECK.isDebugEnabled() )
3645         {
3646             MavibotInspector.check( this );
3647         }
3648 
3649         return btree;
3650     }
3651 
3652     
3653     /**
3654      * Add a newly closd transaction into the closed transaction queue
3655      */
3656     /* no qualifier */ <K, V> void releaseTransaction( ReadTransaction<K, V> readTransaction )
3657     {
3658         RevisionName revisionName = new RevisionName( 
3659             readTransaction.getRevision(), 
3660             readTransaction.getBtreeHeader().getBtree().getName() );
3661         //closedTransactionsQueue.add( revisionName );
3662     }
3663     
3664     
3665     /**
3666      * Get the current BTreeHeader for a given Btree. It might not exist
3667      */
3668     public BTreeHeader getBTreeHeader( String name )
3669     {
3670         // Get a lock
3671         btreeHeadersLock.readLock().lock();
3672         
3673         // get the current BTree Header for this BTree and revision
3674         BTreeHeader<?, ?> btreeHeader = currentBTreeHeaders.get( name );
3675         
3676         // And unlock 
3677         btreeHeadersLock.readLock().unlock();
3678 
3679         return btreeHeader;
3680     }
3681     
3682     
3683     /**
3684      * Get the new BTreeHeader for a given Btree. It might not exist
3685      */
3686     public BTreeHeader getNewBTreeHeader( String name )
3687     {
3688         // get the current BTree Header for this BTree and revision
3689         BTreeHeader<?, ?> btreeHeader = newBTreeHeaders.get( name );
3690 
3691         return btreeHeader;
3692     }
3693     
3694     
3695     /**
3696      * {@inheritDoc}
3697      */
3698     public void updateNewBTreeHeaders( BTreeHeader btreeHeader )
3699     {
3700         newBTreeHeaders.put( btreeHeader.getBtree().getName(), btreeHeader );
3701     }
3702     
3703     
3704     /**
3705      * Swap the current BtreeHeader map with the new one. This method will only
3706      * be called in a single trhead, when the current transaction will be committed.
3707      */
3708     private void swapCurrentBtreeHeaders()
3709     {
3710         // Copy the reference to the current BtreeHeader Map
3711         Map<String, BTreeHeader<?, ?>> tmp = currentBTreeHeaders;
3712         
3713         // Get a write lock
3714         btreeHeadersLock.writeLock().lock();
3715 
3716         // Swap the new BTreeHeader Map
3717         currentBTreeHeaders = newBTreeHeaders;
3718         
3719         // And unlock 
3720         btreeHeadersLock.writeLock().unlock();
3721 
3722         // Last, not least, clear the Map and reinject the latest revision in it
3723         tmp.clear();
3724         tmp.putAll( currentBTreeHeaders );
3725 
3726         // And update the new BTreeHeader map
3727         newBTreeHeaders = tmp;
3728     }
3729     
3730     
3731     /**
3732      * revert the new BTreeHeaders Map to the current BTreeHeader Map. This method
3733      * is called when we have to rollback a transaction.
3734      */
3735     private void revertBtreeHeaders()
3736     {
3737         // Clean up teh new BTreeHeaders Map
3738         newBTreeHeaders.clear();
3739         
3740         // Reinject the latest revision in it
3741         newBTreeHeaders.putAll( currentBTreeHeaders );
3742     }
3743 
3744     
3745     /**
3746      * Loads a B-tree holding the values of a duplicate key
3747      * This tree is also called as dups tree or sub tree
3748      *
3749      * @param offset the offset of the B-tree header
3750      * @return the deserialized B-tree
3751      */
3752     /* No qualifier */<K, V> BTree<V, V> loadDupsBtree( long btreeHeaderOffset, BTree<K, V> parentBtree )
3753     {
3754         try
3755         {
3756             PageIO[] pageIos = readPageIOs( btreeHeaderOffset, Long.MAX_VALUE );
3757 
3758             BTree<V, V> subBtree = BTreeFactory.<V, V> createPersistedBTree( BTreeTypeEnum.PERSISTED_SUB );
3759             loadBtree( pageIos, subBtree, parentBtree );
3760             
3761 
3762             return subBtree;
3763         }
3764         catch ( Exception e )
3765         {
3766             // should not happen
3767             throw new BTreeCreationException( e );
3768         }
3769     }
3770 
3771 
3772     /**
3773      * @see Object#toString()
3774      */
3775     public String toString()
3776     {
3777         StringBuilder sb = new StringBuilder();
3778 
3779         sb.append( "RM free pages : [" );
3780 
3781         if ( firstFreePage != NO_PAGE )
3782         {
3783             long current = firstFreePage;
3784             boolean isFirst = true;
3785 
3786             while ( current != NO_PAGE )
3787             {
3788                 if ( isFirst )
3789                 {
3790                     isFirst = false;
3791                 }
3792                 else
3793                 {
3794                     sb.append( ", " );
3795                 }
3796 
3797                 PageIO pageIo;
3798 
3799                 try
3800                 {
3801                     pageIo = fetchPage( current );
3802                     sb.append( pageIo.getOffset() );
3803                     current = pageIo.getNextPage();
3804                 }
3805                 catch ( EndOfFileExceededException e )
3806                 {
3807                     e.printStackTrace();
3808                 }
3809                 catch ( IOException e )
3810                 {
3811                     e.printStackTrace();
3812                 }
3813 
3814             }
3815         }
3816 
3817         sb.append( "]" );
3818 
3819         return sb.toString();
3820     }
3821 }