1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.io.UnsupportedEncodingException;
26 import java.lang.reflect.Constructor;
27 import java.text.ParseException;
28 import java.util.AbstractList;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.NavigableMap;
37 import java.util.NavigableSet;
38 import java.util.Random;
39 import java.util.Set;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.ConcurrentSkipListMap;
43 import java.util.concurrent.atomic.AtomicBoolean;
44 import java.util.concurrent.atomic.AtomicLong;
45 import java.util.concurrent.locks.ReentrantReadWriteLock;
46
47 import org.apache.commons.logging.Log;
48 import org.apache.commons.logging.LogFactory;
49 import org.apache.hadoop.conf.Configuration;
50 import org.apache.hadoop.fs.FSDataOutputStream;
51 import org.apache.hadoop.fs.FileStatus;
52 import org.apache.hadoop.fs.FileSystem;
53 import org.apache.hadoop.fs.Path;
54 import org.apache.hadoop.hbase.DoNotRetryIOException;
55 import org.apache.hadoop.hbase.DroppedSnapshotException;
56 import org.apache.hadoop.hbase.HBaseConfiguration;
57 import org.apache.hadoop.hbase.HColumnDescriptor;
58 import org.apache.hadoop.hbase.HConstants;
59 import org.apache.hadoop.hbase.HRegionInfo;
60 import org.apache.hadoop.hbase.HTableDescriptor;
61 import org.apache.hadoop.hbase.KeyValue;
62 import org.apache.hadoop.hbase.NotServingRegionException;
63 import org.apache.hadoop.hbase.UnknownScannerException;
64 import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
65 import org.apache.hadoop.hbase.client.Delete;
66 import org.apache.hadoop.hbase.client.Get;
67 import org.apache.hadoop.hbase.client.Increment;
68 import org.apache.hadoop.hbase.client.Put;
69 import org.apache.hadoop.hbase.client.Result;
70 import org.apache.hadoop.hbase.client.Row;
71 import org.apache.hadoop.hbase.client.RowLock;
72 import org.apache.hadoop.hbase.client.Scan;
73 import org.apache.hadoop.hbase.filter.Filter;
74 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
75 import org.apache.hadoop.hbase.io.HeapSize;
76 import org.apache.hadoop.hbase.io.TimeRange;
77 import org.apache.hadoop.hbase.io.hfile.BlockCache;
78 import org.apache.hadoop.hbase.regionserver.wal.HLog;
79 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
80 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
81 import org.apache.hadoop.hbase.util.Bytes;
82 import org.apache.hadoop.hbase.util.CancelableProgressable;
83 import org.apache.hadoop.hbase.util.ClassSize;
84 import org.apache.hadoop.hbase.util.CompressionTest;
85 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
86 import org.apache.hadoop.hbase.util.FSUtils;
87 import org.apache.hadoop.hbase.util.Pair;
88 import org.apache.hadoop.hbase.util.Writables;
89 import org.apache.hadoop.io.Writable;
90 import org.apache.hadoop.util.Progressable;
91 import org.apache.hadoop.util.StringUtils;
92
93 import com.google.common.collect.Lists;
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 public class HRegion implements HeapSize {
132 public static final Log LOG = LogFactory.getLog(HRegion.class);
133 static final String MERGEDIR = "merges";
134
135 final AtomicBoolean closed = new AtomicBoolean(false);
136
137
138
139
140
141 final AtomicBoolean closing = new AtomicBoolean(false);
142
143
144
145
146
147 private final Set<byte[]> lockedRows =
148 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
149 private final Map<Integer, byte []> lockIds =
150 new HashMap<Integer, byte []>();
151 private int lockIdGenerator = 1;
152 static private Random rand = new Random();
153
154 protected final Map<byte [], Store> stores =
155 new ConcurrentSkipListMap<byte [], Store>(Bytes.BYTES_RAWCOMPARATOR);
156
157
158
159
160
161
162
163 final AtomicLong memstoreSize = new AtomicLong(0);
164
165
166
167
168
169 final Path tableDir;
170
171 final HLog log;
172 final FileSystem fs;
173 final Configuration conf;
174 final HRegionInfo regionInfo;
175 final Path regiondir;
176 KeyValue.KVComparator comparator;
177
178
179
180
181
182 private volatile boolean forceMajorCompaction = false;
183 private Pair<Long,Long> lastCompactInfo = null;
184
185
186 private final Object closeLock = new Object();
187
188
189
190
191
192 static class WriteState {
193
194 volatile boolean flushing = false;
195
196 volatile boolean flushRequested = false;
197
198 volatile boolean compacting = false;
199
200 volatile boolean writesEnabled = true;
201
202 volatile boolean readOnly = false;
203
204
205
206
207
208
209 synchronized void setReadOnly(final boolean onOff) {
210 this.writesEnabled = !onOff;
211 this.readOnly = onOff;
212 }
213
214 boolean isReadOnly() {
215 return this.readOnly;
216 }
217
218 boolean isFlushRequested() {
219 return this.flushRequested;
220 }
221 }
222
223 final WriteState writestate = new WriteState();
224
225 final long memstoreFlushSize;
226 private volatile long lastFlushTime;
227 private List<Pair<Long,Long>> recentFlushes = new ArrayList<Pair<Long,Long>>();
228 final FlushRequester flushRequester;
229 private final long blockingMemStoreSize;
230 final long threadWakeFrequency;
231
232 final ReentrantReadWriteLock lock =
233 new ReentrantReadWriteLock();
234
235
236 private final ReentrantReadWriteLock updatesLock =
237 new ReentrantReadWriteLock();
238 private boolean splitRequest;
239 private byte[] splitPoint = null;
240
241 private final ReadWriteConsistencyControl rwcc =
242 new ReadWriteConsistencyControl();
243
244
245
246
247 public final static String REGIONINFO_FILE = ".regioninfo";
248
249
250
251
252 public HRegion(){
253 this.tableDir = null;
254 this.blockingMemStoreSize = 0L;
255 this.conf = null;
256 this.flushRequester = null;
257 this.fs = null;
258 this.memstoreFlushSize = 0L;
259 this.log = null;
260 this.regiondir = null;
261 this.regionInfo = null;
262 this.threadWakeFrequency = 0L;
263 }
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 public HRegion(Path tableDir, HLog log, FileSystem fs, Configuration conf,
289 HRegionInfo regionInfo, FlushRequester flushRequester) {
290 this.tableDir = tableDir;
291 this.comparator = regionInfo.getComparator();
292 this.log = log;
293 this.fs = fs;
294 this.conf = conf;
295 this.regionInfo = regionInfo;
296 this.flushRequester = flushRequester;
297 this.threadWakeFrequency = conf.getLong(HConstants.THREAD_WAKE_FREQUENCY,
298 10 * 1000);
299 String encodedNameStr = this.regionInfo.getEncodedName();
300 this.regiondir = getRegionDir(this.tableDir, encodedNameStr);
301 long flushSize = regionInfo.getTableDesc().getMemStoreFlushSize();
302 if (flushSize == HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE) {
303 flushSize = conf.getLong("hbase.hregion.memstore.flush.size",
304 HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
305 }
306 this.memstoreFlushSize = flushSize;
307 this.blockingMemStoreSize = this.memstoreFlushSize *
308 conf.getLong("hbase.hregion.memstore.block.multiplier", 2);
309 if (LOG.isDebugEnabled()) {
310
311 LOG.debug("Instantiated " + this);
312 }
313 }
314
315
316
317
318
319
320 public long initialize() throws IOException {
321 return initialize(null);
322 }
323
324
325
326
327
328
329
330
331 public long initialize(final CancelableProgressable reporter)
332 throws IOException {
333
334 this.closing.set(false);
335 this.closed.set(false);
336
337
338 checkRegioninfoOnFilesystem();
339
340
341 cleanupTmpDir();
342
343
344 long maxSeqId = -1;
345 for (HColumnDescriptor c : this.regionInfo.getTableDesc().getFamilies()) {
346 Store store = instantiateHStore(this.tableDir, c);
347 this.stores.put(c.getName(), store);
348 long storeSeqId = store.getMaxSequenceId();
349 if (storeSeqId > maxSeqId) {
350 maxSeqId = storeSeqId;
351 }
352 }
353
354 maxSeqId = replayRecoveredEditsIfAny(this.regiondir, maxSeqId, reporter);
355
356
357
358
359 SplitTransaction.cleanupAnySplitDetritus(this);
360 FSUtils.deleteDirectory(this.fs, new Path(regiondir, MERGEDIR));
361
362 this.writestate.setReadOnly(this.regionInfo.getTableDesc().isReadOnly());
363
364 this.writestate.compacting = false;
365 this.lastFlushTime = EnvironmentEdgeManager.currentTimeMillis();
366
367
368 long nextSeqid = maxSeqId + 1;
369 LOG.info("Onlined " + this.toString() + "; next sequenceid=" + nextSeqid);
370 return nextSeqid;
371 }
372
373
374
375
376
377
378
379 static void moveInitialFilesIntoPlace(final FileSystem fs,
380 final Path initialFiles, final Path regiondir)
381 throws IOException {
382 if (initialFiles != null && fs.exists(initialFiles)) {
383 if (!fs.rename(initialFiles, regiondir)) {
384 LOG.warn("Unable to rename " + initialFiles + " to " + regiondir);
385 }
386 }
387 }
388
389
390
391
392 public boolean hasReferences() {
393 for (Store store : this.stores.values()) {
394 for (StoreFile sf : store.getStorefiles()) {
395
396 if (sf.isReference()) return true;
397 }
398 }
399 return false;
400 }
401
402
403
404
405
406
407 private void checkRegioninfoOnFilesystem() throws IOException {
408 Path regioninfoPath = new Path(this.regiondir, REGIONINFO_FILE);
409 if (this.fs.exists(regioninfoPath) &&
410 this.fs.getFileStatus(regioninfoPath).getLen() > 0) {
411 return;
412 }
413
414
415
416
417 Path tmpPath = new Path(getTmpDir(), REGIONINFO_FILE);
418 FSDataOutputStream out = this.fs.create(tmpPath, true);
419 try {
420 this.regionInfo.write(out);
421 out.write('\n');
422 out.write('\n');
423 out.write(Bytes.toBytes(this.regionInfo.toString()));
424 } finally {
425 out.close();
426 }
427 if (!fs.rename(tmpPath, regioninfoPath)) {
428 throw new IOException("Unable to rename " + tmpPath + " to " +
429 regioninfoPath);
430 }
431 }
432
433
434 public HRegionInfo getRegionInfo() {
435 return this.regionInfo;
436 }
437
438
439 public boolean isClosed() {
440 return this.closed.get();
441 }
442
443
444
445
446 public boolean isClosing() {
447 return this.closing.get();
448 }
449
450 boolean areWritesEnabled() {
451 synchronized(this.writestate) {
452 return this.writestate.writesEnabled;
453 }
454 }
455
456 public ReadWriteConsistencyControl getRWCC() {
457 return rwcc;
458 }
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 public List<StoreFile> close() throws IOException {
474 return close(false);
475 }
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491 public List<StoreFile> close(final boolean abort) throws IOException {
492
493
494 synchronized (closeLock) {
495 return doClose(abort);
496 }
497 }
498
499 private List<StoreFile> doClose(final boolean abort)
500 throws IOException {
501 if (isClosed()) {
502 LOG.warn("Region " + this + " already closed");
503 return null;
504 }
505 boolean wasFlushing = false;
506 synchronized (writestate) {
507
508
509 writestate.writesEnabled = false;
510 wasFlushing = writestate.flushing;
511 LOG.debug("Closing " + this + ": disabling compactions & flushes");
512 while (writestate.compacting || writestate.flushing) {
513 LOG.debug("waiting for" +
514 (writestate.compacting ? " compaction" : "") +
515 (writestate.flushing ?
516 (writestate.compacting ? "," : "") + " cache flush" :
517 "") + " to complete for region " + this);
518 try {
519 writestate.wait();
520 } catch (InterruptedException iex) {
521
522 }
523 }
524 }
525
526
527
528 if (!abort && !wasFlushing && worthPreFlushing()) {
529 LOG.info("Running close preflush of " + this.getRegionNameAsString());
530 internalFlushcache();
531 }
532 this.closing.set(true);
533 lock.writeLock().lock();
534 try {
535 if (this.isClosed()) {
536
537 return null;
538 }
539 LOG.debug("Updates disabled for region " + this);
540
541 if (!abort) {
542 internalFlushcache();
543 }
544
545 List<StoreFile> result = new ArrayList<StoreFile>();
546 for (Store store : stores.values()) {
547 result.addAll(store.close());
548 }
549 this.closed.set(true);
550 LOG.info("Closed " + this);
551 return result;
552 } finally {
553 lock.writeLock().unlock();
554 }
555 }
556
557
558
559
560 private boolean worthPreFlushing() {
561 return this.memstoreSize.get() >
562 this.conf.getLong("hbase.hregion.preclose.flush.size", 1024 * 1024 * 5);
563 }
564
565
566
567
568
569
570 public byte [] getStartKey() {
571 return this.regionInfo.getStartKey();
572 }
573
574
575 public byte [] getEndKey() {
576 return this.regionInfo.getEndKey();
577 }
578
579
580 public long getRegionId() {
581 return this.regionInfo.getRegionId();
582 }
583
584
585 public byte [] getRegionName() {
586 return this.regionInfo.getRegionName();
587 }
588
589
590 public String getRegionNameAsString() {
591 return this.regionInfo.getRegionNameAsString();
592 }
593
594
595 public HTableDescriptor getTableDesc() {
596 return this.regionInfo.getTableDesc();
597 }
598
599
600 public HLog getLog() {
601 return this.log;
602 }
603
604
605 public Configuration getConf() {
606 return this.conf;
607 }
608
609
610 public Path getRegionDir() {
611 return this.regiondir;
612 }
613
614
615
616
617
618
619
620
621 public static Path getRegionDir(final Path tabledir, final String name) {
622 return new Path(tabledir, name);
623 }
624
625
626 public FileSystem getFilesystem() {
627 return this.fs;
628 }
629
630
631 public Pair<Long,Long> getLastCompactInfo() {
632 return this.lastCompactInfo;
633 }
634
635
636 public long getLastFlushTime() {
637 return this.lastFlushTime;
638 }
639
640
641 public List<Pair<Long,Long>> getRecentFlushInfo() {
642 this.lock.readLock().lock();
643 List<Pair<Long,Long>> ret = this.recentFlushes;
644 this.recentFlushes = new ArrayList<Pair<Long,Long>>();
645 this.lock.readLock().unlock();
646 return ret;
647 }
648
649
650
651
652
653
654
655
656
657 public long getLargestHStoreSize() {
658 long size = 0;
659 for (Store h: stores.values()) {
660 long storeSize = h.getSize();
661 if (storeSize > size) {
662 size = storeSize;
663 }
664 }
665 return size;
666 }
667
668
669
670
671
672 void doRegionCompactionPrep() throws IOException {
673 }
674
675
676
677
678 private void cleanupTmpDir() throws IOException {
679 FSUtils.deleteDirectory(this.fs, getTmpDir());
680 }
681
682
683
684
685
686 Path getTmpDir() {
687 return new Path(getRegionDir(), ".tmp");
688 }
689
690 void setForceMajorCompaction(final boolean b) {
691 this.forceMajorCompaction = b;
692 }
693
694 boolean getForceMajorCompaction() {
695 return this.forceMajorCompaction;
696 }
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712 public byte [] compactStores() throws IOException {
713 boolean majorCompaction = this.forceMajorCompaction;
714 this.forceMajorCompaction = false;
715 return compactStores(majorCompaction);
716 }
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733 byte [] compactStores(final boolean majorCompaction)
734 throws IOException {
735 if (this.closing.get()) {
736 LOG.debug("Skipping compaction on " + this + " because closing");
737 return null;
738 }
739 lock.readLock().lock();
740 this.lastCompactInfo = null;
741 try {
742 if (this.closed.get()) {
743 LOG.debug("Skipping compaction on " + this + " because closed");
744 return null;
745 }
746 byte [] splitRow = null;
747 if (this.closed.get()) {
748 return splitRow;
749 }
750 try {
751 synchronized (writestate) {
752 if (!writestate.compacting && writestate.writesEnabled) {
753 writestate.compacting = true;
754 } else {
755 LOG.info("NOT compacting region " + this +
756 ": compacting=" + writestate.compacting + ", writesEnabled=" +
757 writestate.writesEnabled);
758 return splitRow;
759 }
760 }
761 LOG.info("Starting" + (majorCompaction? " major " : " ") +
762 "compaction on region " + this);
763 long startTime = EnvironmentEdgeManager.currentTimeMillis();
764 doRegionCompactionPrep();
765 long lastCompactSize = 0;
766 long maxSize = -1;
767 boolean completed = false;
768 try {
769 for (Store store: stores.values()) {
770 final Store.StoreSize ss = store.compact(majorCompaction);
771 lastCompactSize += store.getLastCompactSize();
772 if (ss != null && ss.getSize() > maxSize) {
773 maxSize = ss.getSize();
774 splitRow = ss.getSplitRow();
775 }
776 }
777 completed = true;
778 } catch (InterruptedIOException iioe) {
779 LOG.info("compaction interrupted by user: ", iioe);
780 } finally {
781 long now = EnvironmentEdgeManager.currentTimeMillis();
782 LOG.info(((completed) ? "completed" : "aborted")
783 + " compaction on region " + this
784 + " after " + StringUtils.formatTimeDiff(now, startTime));
785 if (completed) {
786 this.lastCompactInfo =
787 new Pair<Long,Long>((now - startTime) / 1000, lastCompactSize);
788 }
789 }
790 } finally {
791 synchronized (writestate) {
792 writestate.compacting = false;
793 writestate.notifyAll();
794 }
795 }
796 if (splitRow != null) {
797 assert splitPoint == null || Bytes.equals(splitRow, splitPoint);
798 this.splitPoint = null;
799 }
800 return splitRow;
801 } finally {
802 lock.readLock().unlock();
803 }
804 }
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826 public boolean flushcache() throws IOException {
827
828 if (this.closing.get()) {
829 LOG.debug("Skipping flush on " + this + " because closing");
830 return false;
831 }
832 lock.readLock().lock();
833 try {
834 if (this.closed.get()) {
835 LOG.debug("Skipping flush on " + this + " because closed");
836 return false;
837 }
838 try {
839 synchronized (writestate) {
840 if (!writestate.flushing && writestate.writesEnabled) {
841 this.writestate.flushing = true;
842 } else {
843 if (LOG.isDebugEnabled()) {
844 LOG.debug("NOT flushing memstore for region " + this +
845 ", flushing=" +
846 writestate.flushing + ", writesEnabled=" +
847 writestate.writesEnabled);
848 }
849 return false;
850 }
851 }
852 return internalFlushcache();
853 } finally {
854 synchronized (writestate) {
855 writestate.flushing = false;
856 this.writestate.flushRequested = false;
857 writestate.notifyAll();
858 }
859 }
860 } finally {
861 lock.readLock().unlock();
862 }
863 }
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899 protected boolean internalFlushcache() throws IOException {
900 return internalFlushcache(this.log, -1);
901 }
902
903
904
905
906
907
908
909
910
911 protected boolean internalFlushcache(final HLog wal, final long myseqid)
912 throws IOException {
913 final long startTime = EnvironmentEdgeManager.currentTimeMillis();
914
915
916 this.lastFlushTime = startTime;
917
918 if (this.memstoreSize.get() <= 0) {
919 return false;
920 }
921 if (LOG.isDebugEnabled()) {
922 LOG.debug("Started memstore flush for " + this +
923 ", current region memstore size " +
924 StringUtils.humanReadableInt(this.memstoreSize.get()) +
925 ((wal != null)? "": "; wal is null, using passed sequenceid=" + myseqid));
926 }
927
928
929
930
931
932
933
934
935 long sequenceId = -1L;
936 long completeSequenceId = -1L;
937
938
939
940
941 this.updatesLock.writeLock().lock();
942 final long currentMemStoreSize = this.memstoreSize.get();
943 List<StoreFlusher> storeFlushers = new ArrayList<StoreFlusher>(stores.size());
944 try {
945 sequenceId = (wal == null)? myseqid: wal.startCacheFlush();
946 completeSequenceId = this.getCompleteCacheFlushSequenceId(sequenceId);
947
948 for (Store s : stores.values()) {
949 storeFlushers.add(s.getStoreFlusher(completeSequenceId));
950 }
951
952
953 for (StoreFlusher flusher : storeFlushers) {
954 flusher.prepare();
955 }
956 } finally {
957 this.updatesLock.writeLock().unlock();
958 }
959
960 LOG.debug("Finished snapshotting, commencing flushing stores");
961
962
963
964
965
966 boolean compactionRequested = false;
967 try {
968
969
970
971
972 for (StoreFlusher flusher : storeFlushers) {
973 flusher.flushCache();
974 }
975
976
977 for (StoreFlusher flusher : storeFlushers) {
978 boolean needsCompaction = flusher.commit();
979 if (needsCompaction) {
980 compactionRequested = true;
981 }
982 }
983 storeFlushers.clear();
984
985
986 this.memstoreSize.addAndGet(-currentMemStoreSize);
987 } catch (Throwable t) {
988
989
990
991
992
993
994 if (wal != null) wal.abortCacheFlush();
995 DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
996 Bytes.toStringBinary(getRegionName()));
997 dse.initCause(t);
998 throw dse;
999 }
1000
1001
1002
1003
1004
1005
1006
1007
1008 if (wal != null) {
1009 wal.completeCacheFlush(this.regionInfo.getEncodedNameAsBytes(),
1010 regionInfo.getTableDesc().getName(), completeSequenceId,
1011 this.getRegionInfo().isMetaRegion());
1012 }
1013
1014
1015
1016 synchronized (this) {
1017 notifyAll();
1018 }
1019
1020 long time = EnvironmentEdgeManager.currentTimeMillis() - startTime;
1021 if (LOG.isDebugEnabled()) {
1022 LOG.info("Finished memstore flush of ~" +
1023 StringUtils.humanReadableInt(currentMemStoreSize) + " for region " +
1024 this + " in " + time + "ms, sequenceid=" + sequenceId +
1025 ", compaction requested=" + compactionRequested +
1026 ((wal == null)? "; wal=null": ""));
1027 }
1028 this.recentFlushes.add(new Pair<Long,Long>(time/1000,currentMemStoreSize));
1029
1030 return compactionRequested;
1031 }
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041 protected long getCompleteCacheFlushSequenceId(long currentSequenceId) {
1042 return currentSequenceId;
1043 }
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057 Result getClosestRowBefore(final byte [] row)
1058 throws IOException{
1059 return getClosestRowBefore(row, HConstants.CATALOG_FAMILY);
1060 }
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 public Result getClosestRowBefore(final byte [] row, final byte [] family)
1073 throws IOException {
1074
1075
1076 KeyValue key = null;
1077 checkRow(row);
1078 startRegionOperation();
1079 try {
1080 Store store = getStore(family);
1081 KeyValue kv = new KeyValue(row, HConstants.LATEST_TIMESTAMP);
1082
1083 key = store.getRowKeyAtOrBefore(kv);
1084 if (key == null) {
1085 return null;
1086 }
1087 Get get = new Get(key.getRow());
1088 get.addFamily(family);
1089 return get(get, null);
1090 } finally {
1091 closeRegionOperation();
1092 }
1093 }
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105 public InternalScanner getScanner(Scan scan)
1106 throws IOException {
1107 return getScanner(scan, null);
1108 }
1109
1110 protected InternalScanner getScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
1111 startRegionOperation();
1112 try {
1113
1114 if(scan.hasFamilies()) {
1115 for(byte [] family : scan.getFamilyMap().keySet()) {
1116 checkFamily(family);
1117 }
1118 } else {
1119 for(byte[] family: regionInfo.getTableDesc().getFamiliesKeys()){
1120 scan.addFamily(family);
1121 }
1122 }
1123 return instantiateInternalScanner(scan, additionalScanners);
1124
1125 } finally {
1126 closeRegionOperation();
1127 }
1128 }
1129
1130 protected InternalScanner instantiateInternalScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
1131 return new RegionScanner(scan, additionalScanners);
1132 }
1133
1134
1135
1136
1137 private void prepareDelete(Delete delete) throws IOException {
1138
1139 if(delete.getFamilyMap().isEmpty()){
1140 for(byte [] family : regionInfo.getTableDesc().getFamiliesKeys()){
1141
1142 delete.deleteFamily(family, delete.getTimeStamp());
1143 }
1144 } else {
1145 for(byte [] family : delete.getFamilyMap().keySet()) {
1146 if(family == null) {
1147 throw new NoSuchColumnFamilyException("Empty family is invalid");
1148 }
1149 checkFamily(family);
1150 }
1151 }
1152 }
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163 public void delete(Delete delete, Integer lockid, boolean writeToWAL)
1164 throws IOException {
1165 checkReadOnly();
1166 checkResources();
1167 Integer lid = null;
1168 startRegionOperation();
1169 try {
1170 byte [] row = delete.getRow();
1171
1172 lid = getLock(lockid, row, true);
1173
1174
1175 prepareDelete(delete);
1176 delete(delete.getFamilyMap(), writeToWAL);
1177
1178 } finally {
1179 if(lockid == null) releaseRowLock(lid);
1180 closeRegionOperation();
1181 }
1182 }
1183
1184
1185
1186
1187
1188
1189
1190 public void delete(Map<byte[], List<KeyValue>> familyMap, boolean writeToWAL)
1191 throws IOException {
1192 long now = EnvironmentEdgeManager.currentTimeMillis();
1193 byte [] byteNow = Bytes.toBytes(now);
1194 boolean flush = false;
1195
1196 updatesLock.readLock().lock();
1197
1198 try {
1199
1200 for (Map.Entry<byte[], List<KeyValue>> e : familyMap.entrySet()) {
1201
1202 byte[] family = e.getKey();
1203 List<KeyValue> kvs = e.getValue();
1204 Map<byte[], Integer> kvCount = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
1205
1206 for (KeyValue kv: kvs) {
1207
1208
1209 if (kv.isLatestTimestamp() && kv.isDeleteType()) {
1210 byte[] qual = kv.getQualifier();
1211 if (qual == null) qual = HConstants.EMPTY_BYTE_ARRAY;
1212
1213 Integer count = kvCount.get(qual);
1214 if (count == null) {
1215 kvCount.put(qual, 1);
1216 } else {
1217 kvCount.put(qual, count + 1);
1218 }
1219 count = kvCount.get(qual);
1220
1221 Get get = new Get(kv.getRow());
1222 get.setMaxVersions(count);
1223 get.addColumn(family, qual);
1224
1225 List<KeyValue> result = get(get);
1226
1227 if (result.size() < count) {
1228
1229 kv.updateLatestStamp(byteNow);
1230 continue;
1231 }
1232 if (result.size() > count) {
1233 throw new RuntimeException("Unexpected size: " + result.size());
1234 }
1235 KeyValue getkv = result.get(count - 1);
1236 Bytes.putBytes(kv.getBuffer(), kv.getTimestampOffset(),
1237 getkv.getBuffer(), getkv.getTimestampOffset(), Bytes.SIZEOF_LONG);
1238 } else {
1239 kv.updateLatestStamp(byteNow);
1240 }
1241 }
1242 }
1243
1244 if (writeToWAL) {
1245
1246
1247
1248
1249
1250
1251
1252
1253 WALEdit walEdit = new WALEdit();
1254 addFamilyMapToWALEdit(familyMap, walEdit);
1255 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1256 walEdit, now);
1257 }
1258
1259
1260 long addedSize = applyFamilyMapToMemstore(familyMap);
1261 flush = isFlushSize(memstoreSize.addAndGet(addedSize));
1262 } finally {
1263 this.updatesLock.readLock().unlock();
1264 }
1265
1266 if (flush) {
1267
1268 requestFlush();
1269 }
1270 }
1271
1272
1273
1274
1275
1276 public void put(Put put) throws IOException {
1277 this.put(put, null, put.getWriteToWAL());
1278 }
1279
1280
1281
1282
1283
1284
1285 public void put(Put put, boolean writeToWAL) throws IOException {
1286 this.put(put, null, writeToWAL);
1287 }
1288
1289
1290
1291
1292
1293
1294 public void put(Put put, Integer lockid) throws IOException {
1295 this.put(put, lockid, put.getWriteToWAL());
1296 }
1297
1298
1299
1300
1301
1302
1303
1304 public void put(Put put, Integer lockid, boolean writeToWAL)
1305 throws IOException {
1306 checkReadOnly();
1307
1308
1309
1310
1311
1312 checkResources();
1313 startRegionOperation();
1314 try {
1315
1316
1317
1318
1319
1320 byte [] row = put.getRow();
1321
1322 Integer lid = getLock(lockid, row, true);
1323
1324 try {
1325
1326 put(put.getFamilyMap(), writeToWAL);
1327 } finally {
1328 if(lockid == null) releaseRowLock(lid);
1329 }
1330 } finally {
1331 closeRegionOperation();
1332 }
1333 }
1334
1335
1336
1337
1338
1339
1340 private static class BatchOperationInProgress<T> {
1341 T[] operations;
1342 OperationStatusCode[] retCodes;
1343 int nextIndexToProcess = 0;
1344
1345 public BatchOperationInProgress(T[] operations) {
1346 this.operations = operations;
1347 retCodes = new OperationStatusCode[operations.length];
1348 Arrays.fill(retCodes, OperationStatusCode.NOT_RUN);
1349 }
1350
1351 public boolean isDone() {
1352 return nextIndexToProcess == operations.length;
1353 }
1354 }
1355
1356
1357
1358
1359
1360 public OperationStatusCode[] put(Put[] puts) throws IOException {
1361 @SuppressWarnings("unchecked")
1362 Pair<Put, Integer> putsAndLocks[] = new Pair[puts.length];
1363
1364 for (int i = 0; i < puts.length; i++) {
1365 putsAndLocks[i] = new Pair<Put, Integer>(puts[i], null);
1366 }
1367 return put(putsAndLocks);
1368 }
1369
1370
1371
1372
1373
1374
1375 public OperationStatusCode[] put(Pair<Put, Integer>[] putsAndLocks) throws IOException {
1376 BatchOperationInProgress<Pair<Put, Integer>> batchOp =
1377 new BatchOperationInProgress<Pair<Put,Integer>>(putsAndLocks);
1378
1379 while (!batchOp.isDone()) {
1380 checkReadOnly();
1381 checkResources();
1382
1383 long newSize;
1384 startRegionOperation();
1385 try {
1386 long addedSize = doMiniBatchPut(batchOp);
1387 newSize = memstoreSize.addAndGet(addedSize);
1388 } finally {
1389 closeRegionOperation();
1390 }
1391 if (isFlushSize(newSize)) {
1392 requestFlush();
1393 }
1394 }
1395 return batchOp.retCodes;
1396 }
1397
1398 private long doMiniBatchPut(BatchOperationInProgress<Pair<Put, Integer>> batchOp) throws IOException {
1399 long now = EnvironmentEdgeManager.currentTimeMillis();
1400 byte[] byteNow = Bytes.toBytes(now);
1401 boolean locked = false;
1402
1403
1404 List<Integer> acquiredLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
1405
1406 int firstIndex = batchOp.nextIndexToProcess;
1407 int lastIndexExclusive = firstIndex;
1408 boolean success = false;
1409 try {
1410
1411
1412
1413
1414 int numReadyToWrite = 0;
1415 while (lastIndexExclusive < batchOp.operations.length) {
1416 Pair<Put, Integer> nextPair = batchOp.operations[lastIndexExclusive];
1417 Put put = nextPair.getFirst();
1418 Integer providedLockId = nextPair.getSecond();
1419
1420
1421 try {
1422 checkFamilies(put.getFamilyMap().keySet());
1423 } catch (NoSuchColumnFamilyException nscf) {
1424 LOG.warn("No such column family in batch put", nscf);
1425 batchOp.retCodes[lastIndexExclusive] = OperationStatusCode.BAD_FAMILY;
1426 lastIndexExclusive++;
1427 continue;
1428 }
1429
1430
1431
1432 boolean shouldBlock = numReadyToWrite == 0;
1433 Integer acquiredLockId = getLock(providedLockId, put.getRow(), shouldBlock);
1434 if (acquiredLockId == null) {
1435
1436 assert !shouldBlock : "Should never fail to get lock when blocking";
1437 break;
1438 }
1439 if (providedLockId == null) {
1440 acquiredLocks.add(acquiredLockId);
1441 }
1442 lastIndexExclusive++;
1443 numReadyToWrite++;
1444 }
1445
1446 if (numReadyToWrite <= 0) return 0L;
1447
1448
1449
1450
1451
1452
1453 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1454 updateKVTimestamps(
1455 batchOp.operations[i].getFirst().getFamilyMap().values(),
1456 byteNow);
1457 }
1458
1459
1460 this.updatesLock.readLock().lock();
1461 locked = true;
1462
1463
1464
1465
1466 WALEdit walEdit = new WALEdit();
1467 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1468
1469 if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
1470
1471 Put p = batchOp.operations[i].getFirst();
1472 if (!p.getWriteToWAL()) continue;
1473 addFamilyMapToWALEdit(p.getFamilyMap(), walEdit);
1474 }
1475
1476
1477 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1478 walEdit, now);
1479
1480
1481
1482
1483 long addedSize = 0;
1484 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1485 if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
1486
1487 Put p = batchOp.operations[i].getFirst();
1488 addedSize += applyFamilyMapToMemstore(p.getFamilyMap());
1489 batchOp.retCodes[i] = OperationStatusCode.SUCCESS;
1490 }
1491 success = true;
1492 return addedSize;
1493 } finally {
1494 if (locked)
1495 this.updatesLock.readLock().unlock();
1496
1497 for (Integer toRelease : acquiredLocks) {
1498 releaseRowLock(toRelease);
1499 }
1500 if (!success) {
1501 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1502 if (batchOp.retCodes[i] == OperationStatusCode.NOT_RUN) {
1503 batchOp.retCodes[i] = OperationStatusCode.FAILURE;
1504 }
1505 }
1506 }
1507 batchOp.nextIndexToProcess = lastIndexExclusive;
1508 }
1509 }
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526 public boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier,
1527 byte [] expectedValue, Writable w, Integer lockId, boolean writeToWAL)
1528 throws IOException{
1529 checkReadOnly();
1530
1531
1532 checkResources();
1533 boolean isPut = w instanceof Put;
1534 if (!isPut && !(w instanceof Delete))
1535 throw new DoNotRetryIOException("Action must be Put or Delete");
1536 Row r = (Row)w;
1537 if (Bytes.compareTo(row, r.getRow()) != 0) {
1538 throw new DoNotRetryIOException("Action's getRow must match the passed row");
1539 }
1540
1541 startRegionOperation();
1542 try {
1543 RowLock lock = isPut ? ((Put)w).getRowLock() : ((Delete)w).getRowLock();
1544 Get get = new Get(row, lock);
1545 checkFamily(family);
1546 get.addColumn(family, qualifier);
1547
1548
1549 Integer lid = getLock(lockId, get.getRow(), true);
1550 List<KeyValue> result = new ArrayList<KeyValue>();
1551 try {
1552 result = get(get);
1553
1554 boolean matches = false;
1555 if (result.size() == 0 &&
1556 (expectedValue == null || expectedValue.length == 0)) {
1557 matches = true;
1558 } else if (result.size() == 1) {
1559
1560 byte [] actualValue = result.get(0).getValue();
1561 matches = Bytes.equals(expectedValue, actualValue);
1562 }
1563
1564 if (matches) {
1565
1566 if (isPut) {
1567 put(((Put)w).getFamilyMap(), writeToWAL);
1568 } else {
1569 Delete d = (Delete)w;
1570 prepareDelete(d);
1571 delete(d.getFamilyMap(), writeToWAL);
1572 }
1573 return true;
1574 }
1575 return false;
1576 } finally {
1577 if(lockId == null) releaseRowLock(lid);
1578 }
1579 } finally {
1580 closeRegionOperation();
1581 }
1582 }
1583
1584
1585
1586
1587
1588
1589 private void updateKVTimestamps(
1590 final Iterable<List<KeyValue>> keyLists, final byte[] now) {
1591 for (List<KeyValue> keys: keyLists) {
1592 if (keys == null) continue;
1593 for (KeyValue key : keys) {
1594 key.updateLatestStamp(now);
1595 }
1596 }
1597 }
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608 private void checkResources() {
1609
1610
1611 if (this.getRegionInfo().isMetaRegion()) return;
1612
1613 boolean blocked = false;
1614 while (this.memstoreSize.get() > this.blockingMemStoreSize) {
1615 requestFlush();
1616 if (!blocked) {
1617 LOG.info("Blocking updates for '" + Thread.currentThread().getName() +
1618 "' on region " + Bytes.toStringBinary(getRegionName()) +
1619 ": memstore size " +
1620 StringUtils.humanReadableInt(this.memstoreSize.get()) +
1621 " is >= than blocking " +
1622 StringUtils.humanReadableInt(this.blockingMemStoreSize) + " size");
1623 }
1624 blocked = true;
1625 synchronized(this) {
1626 try {
1627 wait(threadWakeFrequency);
1628 } catch (InterruptedException e) {
1629
1630 }
1631 }
1632 }
1633 if (blocked) {
1634 LOG.info("Unblocking updates for region " + this + " '"
1635 + Thread.currentThread().getName() + "'");
1636 }
1637 }
1638
1639
1640
1641
1642 protected void checkReadOnly() throws IOException {
1643 if (this.writestate.isReadOnly()) {
1644 throw new IOException("region is read only");
1645 }
1646 }
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656 private void put(final byte [] family, final List<KeyValue> edits)
1657 throws IOException {
1658 Map<byte[], List<KeyValue>> familyMap = new HashMap<byte[], List<KeyValue>>();
1659 familyMap.put(family, edits);
1660 this.put(familyMap, true);
1661 }
1662
1663
1664
1665
1666
1667
1668
1669
1670 private void put(final Map<byte [], List<KeyValue>> familyMap,
1671 boolean writeToWAL) throws IOException {
1672 long now = EnvironmentEdgeManager.currentTimeMillis();
1673 byte[] byteNow = Bytes.toBytes(now);
1674 boolean flush = false;
1675 this.updatesLock.readLock().lock();
1676 try {
1677 checkFamilies(familyMap.keySet());
1678 updateKVTimestamps(familyMap.values(), byteNow);
1679
1680
1681
1682
1683
1684 if (writeToWAL) {
1685 WALEdit walEdit = new WALEdit();
1686 addFamilyMapToWALEdit(familyMap, walEdit);
1687 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1688 walEdit, now);
1689 }
1690
1691 long addedSize = applyFamilyMapToMemstore(familyMap);
1692 flush = isFlushSize(memstoreSize.addAndGet(addedSize));
1693 } finally {
1694 this.updatesLock.readLock().unlock();
1695 }
1696 if (flush) {
1697
1698 requestFlush();
1699 }
1700 }
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711 private long applyFamilyMapToMemstore(Map<byte[], List<KeyValue>> familyMap) {
1712 ReadWriteConsistencyControl.WriteEntry w = null;
1713 long size = 0;
1714 try {
1715 w = rwcc.beginMemstoreInsert();
1716
1717 for (Map.Entry<byte[], List<KeyValue>> e : familyMap.entrySet()) {
1718 byte[] family = e.getKey();
1719 List<KeyValue> edits = e.getValue();
1720
1721 Store store = getStore(family);
1722 for (KeyValue kv: edits) {
1723 kv.setMemstoreTS(w.getWriteNumber());
1724 size += store.add(kv);
1725 }
1726 }
1727 } finally {
1728 rwcc.completeMemstoreInsert(w);
1729 }
1730 return size;
1731 }
1732
1733
1734
1735
1736
1737 private void checkFamilies(Collection<byte[]> families)
1738 throws NoSuchColumnFamilyException {
1739 for (byte[] family : families) {
1740 checkFamily(family);
1741 }
1742 }
1743
1744
1745
1746
1747
1748
1749
1750 private void addFamilyMapToWALEdit(Map<byte[], List<KeyValue>> familyMap,
1751 WALEdit walEdit) {
1752 for (List<KeyValue> edits : familyMap.values()) {
1753 for (KeyValue kv : edits) {
1754 walEdit.add(kv);
1755 }
1756 }
1757 }
1758
1759 private void requestFlush() {
1760 if (this.flushRequester == null) {
1761 return;
1762 }
1763 synchronized (writestate) {
1764 if (this.writestate.isFlushRequested()) {
1765 return;
1766 }
1767 writestate.flushRequested = true;
1768 }
1769
1770 this.flushRequester.requestFlush(this);
1771 if (LOG.isDebugEnabled()) {
1772 LOG.debug("Flush requested on " + this);
1773 }
1774 }
1775
1776
1777
1778
1779
1780 private boolean isFlushSize(final long size) {
1781 return size > this.memstoreFlushSize;
1782 }
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820 protected long replayRecoveredEditsIfAny(final Path regiondir,
1821 final long minSeqId, final CancelableProgressable reporter)
1822 throws UnsupportedEncodingException, IOException {
1823 long seqid = minSeqId;
1824 NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs, regiondir);
1825 if (files == null || files.isEmpty()) return seqid;
1826 for (Path edits: files) {
1827 if (edits == null || !this.fs.exists(edits)) {
1828 LOG.warn("Null or non-existent edits file: " + edits);
1829 continue;
1830 }
1831 if (isZeroLengthThenDelete(this.fs, edits)) continue;
1832 try {
1833 seqid = replayRecoveredEdits(edits, seqid, reporter);
1834 } catch (IOException e) {
1835 boolean skipErrors = conf.getBoolean("hbase.skip.errors", false);
1836 if (skipErrors) {
1837 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1838 LOG.error("hbase.skip.errors=true so continuing. Renamed " + edits +
1839 " as " + p, e);
1840 } else {
1841 throw e;
1842 }
1843 }
1844 }
1845 if (seqid > minSeqId) {
1846
1847 internalFlushcache(null, seqid);
1848 }
1849
1850 for (Path file: files) {
1851 if (!this.fs.delete(file, false)) {
1852 LOG.error("Failed delete of " + file);
1853 } else {
1854 LOG.debug("Deleted recovered.edits file=" + file);
1855 }
1856 }
1857 return seqid;
1858 }
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869 private long replayRecoveredEdits(final Path edits,
1870 final long minSeqId, final CancelableProgressable reporter)
1871 throws IOException {
1872 LOG.info("Replaying edits from " + edits + "; minSequenceid=" + minSeqId);
1873 HLog.Reader reader = HLog.getReader(this.fs, edits, conf);
1874 try {
1875 long currentEditSeqId = minSeqId;
1876 long firstSeqIdInLog = -1;
1877 long skippedEdits = 0;
1878 long editsCount = 0;
1879 long intervalEdits = 0;
1880 HLog.Entry entry;
1881 Store store = null;
1882
1883 try {
1884
1885 int interval = this.conf.getInt("hbase.hstore.report.interval.edits",
1886 2000);
1887
1888 int period = this.conf.getInt("hbase.hstore.report.period",
1889 this.conf.getInt("hbase.master.assignment.timeoutmonitor.timeout",
1890 30000) / 2);
1891 long lastReport = EnvironmentEdgeManager.currentTimeMillis();
1892
1893 while ((entry = reader.next()) != null) {
1894 HLogKey key = entry.getKey();
1895 WALEdit val = entry.getEdit();
1896
1897 if (reporter != null) {
1898 intervalEdits += val.size();
1899 if (intervalEdits >= interval) {
1900
1901 intervalEdits = 0;
1902 long cur = EnvironmentEdgeManager.currentTimeMillis();
1903 if (lastReport + period <= cur) {
1904
1905 if(!reporter.progress()) {
1906 String msg = "Progressable reporter failed, stopping replay";
1907 LOG.warn(msg);
1908 throw new IOException(msg);
1909 }
1910 lastReport = cur;
1911 }
1912 }
1913 }
1914
1915 if (firstSeqIdInLog == -1) {
1916 firstSeqIdInLog = key.getLogSeqNum();
1917 }
1918
1919 if (key.getLogSeqNum() <= currentEditSeqId) {
1920 skippedEdits++;
1921 continue;
1922 }
1923 currentEditSeqId = key.getLogSeqNum();
1924 boolean flush = false;
1925 for (KeyValue kv: val.getKeyValues()) {
1926
1927
1928 if (kv.matchingFamily(HLog.METAFAMILY) ||
1929 !Bytes.equals(key.getEncodedRegionName(), this.regionInfo.getEncodedNameAsBytes())) {
1930 skippedEdits++;
1931 continue;
1932 }
1933
1934 if (store == null || !kv.matchingFamily(store.getFamily().getName())) {
1935 store = this.stores.get(kv.getFamily());
1936 }
1937 if (store == null) {
1938
1939
1940 LOG.warn("No family for " + kv);
1941 skippedEdits++;
1942 continue;
1943 }
1944
1945
1946
1947 flush = restoreEdit(store, kv);
1948 editsCount++;
1949 }
1950 if (flush) internalFlushcache(null, currentEditSeqId);
1951 }
1952 } catch (EOFException eof) {
1953 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1954 LOG.warn("Encountered EOF. Most likely due to Master failure during " +
1955 "log spliting, so we have this data in another edit. " +
1956 "Continuing, but renaming " + edits + " as " + p, eof);
1957 } catch (IOException ioe) {
1958
1959
1960 if (ioe.getCause() instanceof ParseException) {
1961 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1962 LOG.warn("File corruption encountered! " +
1963 "Continuing, but renaming " + edits + " as " + p, ioe);
1964 } else {
1965
1966
1967 throw ioe;
1968 }
1969 }
1970 if (LOG.isDebugEnabled()) {
1971 LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
1972 ", firstSequenceidInLog=" + firstSeqIdInLog +
1973 ", maxSequenceidInLog=" + currentEditSeqId);
1974 }
1975 return currentEditSeqId;
1976 } finally {
1977 reader.close();
1978 }
1979 }
1980
1981
1982
1983
1984
1985
1986
1987 protected boolean restoreEdit(final Store s, final KeyValue kv) {
1988 return isFlushSize(this.memstoreSize.addAndGet(s.add(kv)));
1989 }
1990
1991
1992
1993
1994
1995
1996
1997 private static boolean isZeroLengthThenDelete(final FileSystem fs, final Path p)
1998 throws IOException {
1999 FileStatus stat = fs.getFileStatus(p);
2000 if (stat.getLen() > 0) return false;
2001 LOG.warn("File " + p + " is zero-length, deleting.");
2002 fs.delete(p, false);
2003 return true;
2004 }
2005
2006 protected Store instantiateHStore(Path tableDir, HColumnDescriptor c)
2007 throws IOException {
2008 return new Store(tableDir, this, c, this.fs, this.conf);
2009 }
2010
2011
2012
2013
2014
2015
2016
2017
2018 public Store getStore(final byte [] column) {
2019 return this.stores.get(column);
2020 }
2021
2022
2023
2024
2025
2026
2027 private void checkRow(final byte [] row) throws IOException {
2028 if(!rowIsInRange(regionInfo, row)) {
2029 throw new WrongRegionException("Requested row out of range for " +
2030 "HRegion " + this + ", startKey='" +
2031 Bytes.toStringBinary(regionInfo.getStartKey()) + "', getEndKey()='" +
2032 Bytes.toStringBinary(regionInfo.getEndKey()) + "', row='" +
2033 Bytes.toStringBinary(row) + "'");
2034 }
2035 }
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060 public Integer obtainRowLock(final byte [] row) throws IOException {
2061 startRegionOperation();
2062 try {
2063 return internalObtainRowLock(row, true);
2064 } finally {
2065 closeRegionOperation();
2066 }
2067 }
2068
2069
2070
2071
2072
2073
2074
2075 public Integer tryObtainRowLock(final byte[] row) throws IOException {
2076 startRegionOperation();
2077 try {
2078 return internalObtainRowLock(row, false);
2079 } finally {
2080 closeRegionOperation();
2081 }
2082 }
2083
2084
2085
2086
2087
2088
2089
2090 private Integer internalObtainRowLock(final byte[] row, boolean waitForLock)
2091 throws IOException {
2092 checkRow(row);
2093 startRegionOperation();
2094 try {
2095 synchronized (lockedRows) {
2096 while (lockedRows.contains(row)) {
2097 if (!waitForLock) {
2098 return null;
2099 }
2100 try {
2101 lockedRows.wait();
2102 } catch (InterruptedException ie) {
2103
2104 }
2105 }
2106
2107
2108
2109
2110
2111
2112 byte [] prev = null;
2113 Integer lockId = null;
2114 do {
2115 lockId = new Integer(lockIdGenerator++);
2116 prev = lockIds.put(lockId, row);
2117 if (prev != null) {
2118 lockIds.put(lockId, prev);
2119 lockIdGenerator = rand.nextInt();
2120 }
2121 } while (prev != null);
2122
2123 lockedRows.add(row);
2124 lockedRows.notifyAll();
2125 return lockId;
2126 }
2127 } finally {
2128 closeRegionOperation();
2129 }
2130 }
2131
2132
2133
2134
2135
2136
2137 byte [] getRowFromLock(final Integer lockid) {
2138 synchronized (lockedRows) {
2139 return lockIds.get(lockid);
2140 }
2141 }
2142
2143
2144
2145
2146
2147 void releaseRowLock(final Integer lockid) {
2148 synchronized (lockedRows) {
2149 byte[] row = lockIds.remove(lockid);
2150 lockedRows.remove(row);
2151 lockedRows.notifyAll();
2152 }
2153 }
2154
2155
2156
2157
2158
2159
2160 boolean isRowLocked(final Integer lockid) {
2161 synchronized (lockedRows) {
2162 if (lockIds.get(lockid) != null) {
2163 return true;
2164 }
2165 return false;
2166 }
2167 }
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178 private Integer getLock(Integer lockid, byte [] row, boolean waitForLock)
2179 throws IOException {
2180 Integer lid = null;
2181 if (lockid == null) {
2182 lid = internalObtainRowLock(row, waitForLock);
2183 } else {
2184 if (!isRowLocked(lockid)) {
2185 throw new IOException("Invalid row lock");
2186 }
2187 lid = lockid;
2188 }
2189 return lid;
2190 }
2191
2192 public void bulkLoadHFile(String hfilePath, byte[] familyName)
2193 throws IOException {
2194 startRegionOperation();
2195 try {
2196 Store store = getStore(familyName);
2197 if (store == null) {
2198 throw new DoNotRetryIOException(
2199 "No such column family " + Bytes.toStringBinary(familyName));
2200 }
2201 store.bulkLoadHFile(hfilePath);
2202 } finally {
2203 closeRegionOperation();
2204 }
2205
2206 }
2207
2208
2209 @Override
2210 public boolean equals(Object o) {
2211 if (!(o instanceof HRegion)) {
2212 return false;
2213 }
2214 return this.hashCode() == ((HRegion)o).hashCode();
2215 }
2216
2217 @Override
2218 public int hashCode() {
2219 return Bytes.hashCode(this.regionInfo.getRegionName());
2220 }
2221
2222 @Override
2223 public String toString() {
2224 return this.regionInfo.getRegionNameAsString();
2225 }
2226
2227
2228 public Path getTableDir() {
2229 return this.tableDir;
2230 }
2231
2232
2233
2234
2235
2236
2237 class RegionScanner implements InternalScanner {
2238
2239 KeyValueHeap storeHeap = null;
2240 private final byte [] stopRow;
2241 private Filter filter;
2242 private List<KeyValue> results = new ArrayList<KeyValue>();
2243 private int batch;
2244 private int isScan;
2245 private boolean filterClosed = false;
2246 private long readPt;
2247
2248 public HRegionInfo getRegionName() {
2249 return regionInfo;
2250 }
2251 RegionScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
2252
2253 this.filter = scan.getFilter();
2254 this.batch = scan.getBatch();
2255 if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
2256 this.stopRow = null;
2257 } else {
2258 this.stopRow = scan.getStopRow();
2259 }
2260
2261
2262 this.isScan = scan.isGetScan() ? -1 : 0;
2263
2264 this.readPt = ReadWriteConsistencyControl.resetThreadReadPoint(rwcc);
2265
2266 List<KeyValueScanner> scanners = new ArrayList<KeyValueScanner>();
2267 if (additionalScanners != null) {
2268 scanners.addAll(additionalScanners);
2269 }
2270
2271 for (Map.Entry<byte[], NavigableSet<byte[]>> entry :
2272 scan.getFamilyMap().entrySet()) {
2273 Store store = stores.get(entry.getKey());
2274 scanners.add(store.getScanner(scan, entry.getValue()));
2275 }
2276 this.storeHeap = new KeyValueHeap(scanners, comparator);
2277 }
2278
2279 RegionScanner(Scan scan) throws IOException {
2280 this(scan, null);
2281 }
2282
2283
2284
2285
2286 protected void resetFilters() {
2287 if (filter != null) {
2288 filter.reset();
2289 }
2290 }
2291
2292 public synchronized boolean next(List<KeyValue> outResults, int limit)
2293 throws IOException {
2294 if (this.filterClosed) {
2295 throw new UnknownScannerException("Scanner was closed (timed out?) " +
2296 "after we renewed it. Could be caused by a very slow scanner " +
2297 "or a lengthy garbage collection");
2298 }
2299 startRegionOperation();
2300 try {
2301
2302
2303 ReadWriteConsistencyControl.setThreadReadPoint(this.readPt);
2304
2305 results.clear();
2306 boolean returnResult = nextInternal(limit);
2307
2308 outResults.addAll(results);
2309 resetFilters();
2310 if (isFilterDone()) {
2311 return false;
2312 }
2313 return returnResult;
2314 } finally {
2315 closeRegionOperation();
2316 }
2317 }
2318
2319 public synchronized boolean next(List<KeyValue> outResults)
2320 throws IOException {
2321
2322 return next(outResults, batch);
2323 }
2324
2325
2326
2327
2328 synchronized boolean isFilterDone() {
2329 return this.filter != null && this.filter.filterAllRemaining();
2330 }
2331
2332 private boolean nextInternal(int limit) throws IOException {
2333 while (true) {
2334 byte [] currentRow = peekRow();
2335 if (isStopRow(currentRow)) {
2336 if (filter != null && filter.hasFilterRow()) {
2337 filter.filterRow(results);
2338 }
2339 if (filter != null && filter.filterRow()) {
2340 results.clear();
2341 }
2342
2343 return false;
2344 } else if (filterRowKey(currentRow)) {
2345 nextRow(currentRow);
2346 } else {
2347 byte [] nextRow;
2348 do {
2349 this.storeHeap.next(results, limit - results.size());
2350 if (limit > 0 && results.size() == limit) {
2351 if (this.filter != null && filter.hasFilterRow()) throw new IncompatibleFilterException(
2352 "Filter with filterRow(List<KeyValue>) incompatible with scan with limit!");
2353 return true;
2354 }
2355 } while (Bytes.equals(currentRow, nextRow = peekRow()));
2356
2357 final boolean stopRow = isStopRow(nextRow);
2358
2359
2360
2361
2362 if (filter != null && filter.hasFilterRow()) {
2363 filter.filterRow(results);
2364 }
2365
2366 if (results.isEmpty() || filterRow()) {
2367
2368
2369
2370
2371
2372 nextRow(currentRow);
2373
2374
2375
2376
2377 if (!stopRow) continue;
2378 }
2379 return !stopRow;
2380 }
2381 }
2382 }
2383
2384 private boolean filterRow() {
2385 return filter != null
2386 && filter.filterRow();
2387 }
2388 private boolean filterRowKey(byte[] row) {
2389 return filter != null
2390 && filter.filterRowKey(row, 0, row.length);
2391 }
2392
2393 protected void nextRow(byte [] currentRow) throws IOException {
2394 while (Bytes.equals(currentRow, peekRow())) {
2395 this.storeHeap.next(MOCKED_LIST);
2396 }
2397 results.clear();
2398 resetFilters();
2399 }
2400
2401 private byte[] peekRow() {
2402 KeyValue kv = this.storeHeap.peek();
2403 return kv == null ? null : kv.getRow();
2404 }
2405
2406 private boolean isStopRow(byte [] currentRow) {
2407 return currentRow == null ||
2408 (stopRow != null &&
2409 comparator.compareRows(stopRow, 0, stopRow.length,
2410 currentRow, 0, currentRow.length) <= isScan);
2411 }
2412
2413 public synchronized void close() {
2414 if (storeHeap != null) {
2415 storeHeap.close();
2416 storeHeap = null;
2417 }
2418 this.filterClosed = true;
2419 }
2420 }
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444 public static HRegion newHRegion(Path tableDir, HLog log, FileSystem fs, Configuration conf,
2445 HRegionInfo regionInfo, FlushRequester flushListener) {
2446 try {
2447 @SuppressWarnings("unchecked")
2448 Class<? extends HRegion> regionClass =
2449 (Class<? extends HRegion>) conf.getClass(HConstants.REGION_IMPL, HRegion.class);
2450
2451 Constructor<? extends HRegion> c =
2452 regionClass.getConstructor(Path.class, HLog.class, FileSystem.class,
2453 Configuration.class, HRegionInfo.class, FlushRequester.class);
2454
2455 return c.newInstance(tableDir, log, fs, conf, regionInfo, flushListener);
2456 } catch (Throwable e) {
2457
2458 throw new IllegalStateException("Could not instantiate a region instance.", e);
2459 }
2460 }
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
2476 final Configuration conf)
2477 throws IOException {
2478 Path tableDir =
2479 HTableDescriptor.getTableDir(rootDir, info.getTableDesc().getName());
2480 Path regionDir = HRegion.getRegionDir(tableDir, info.getEncodedName());
2481 FileSystem fs = FileSystem.get(conf);
2482 fs.mkdirs(regionDir);
2483 HRegion region = HRegion.newHRegion(tableDir,
2484 new HLog(fs, new Path(regionDir, HConstants.HREGION_LOGDIR_NAME),
2485 new Path(regionDir, HConstants.HREGION_OLDLOGDIR_NAME), conf),
2486 fs, conf, info, null);
2487 region.initialize();
2488 return region;
2489 }
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503 public static HRegion openHRegion(final HRegionInfo info, final HLog wal,
2504 final Configuration conf)
2505 throws IOException {
2506 return openHRegion(info, wal, conf, null, null);
2507 }
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523 public static HRegion openHRegion(final HRegionInfo info, final HLog wal,
2524 final Configuration conf, final FlushRequester flusher,
2525 final CancelableProgressable reporter)
2526 throws IOException {
2527 if (LOG.isDebugEnabled()) {
2528 LOG.debug("Opening region: " + info);
2529 }
2530 if (info == null) {
2531 throw new NullPointerException("Passed region info is null");
2532 }
2533 Path dir = HTableDescriptor.getTableDir(FSUtils.getRootDir(conf),
2534 info.getTableDesc().getName());
2535 HRegion r = HRegion.newHRegion(dir, wal, FileSystem.get(conf), conf, info,
2536 flusher);
2537 return r.openHRegion(reporter);
2538 }
2539
2540
2541
2542
2543
2544
2545
2546
2547 protected HRegion openHRegion(final CancelableProgressable reporter)
2548 throws IOException {
2549 checkCompressionCodecs();
2550
2551 long seqid = initialize(reporter);
2552 if (this.log != null) {
2553 this.log.setSequenceNumber(seqid);
2554 }
2555 return this;
2556 }
2557
2558 private void checkCompressionCodecs() throws IOException {
2559 for (HColumnDescriptor fam: regionInfo.getTableDesc().getColumnFamilies()) {
2560 CompressionTest.testCompression(fam.getCompression());
2561 CompressionTest.testCompression(fam.getCompactionCompression());
2562 }
2563 }
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575 public static void addRegionToMETA(HRegion meta, HRegion r)
2576 throws IOException {
2577 meta.checkResources();
2578
2579 byte[] row = r.getRegionName();
2580 Integer lid = meta.obtainRowLock(row);
2581 try {
2582 final List<KeyValue> edits = new ArrayList<KeyValue>(1);
2583 edits.add(new KeyValue(row, HConstants.CATALOG_FAMILY,
2584 HConstants.REGIONINFO_QUALIFIER,
2585 EnvironmentEdgeManager.currentTimeMillis(),
2586 Writables.getBytes(r.getRegionInfo())));
2587 meta.put(HConstants.CATALOG_FAMILY, edits);
2588 } finally {
2589 meta.releaseRowLock(lid);
2590 }
2591 }
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601 public static void deleteRegion(FileSystem fs, Path rootdir, HRegionInfo info)
2602 throws IOException {
2603 deleteRegion(fs, HRegion.getRegionDir(rootdir, info));
2604 }
2605
2606 private static void deleteRegion(FileSystem fs, Path regiondir)
2607 throws IOException {
2608 if (LOG.isDebugEnabled()) {
2609 LOG.debug("DELETING region " + regiondir.toString());
2610 }
2611 if (!fs.delete(regiondir, true)) {
2612 LOG.warn("Failed delete of " + regiondir);
2613 }
2614 }
2615
2616
2617
2618
2619
2620
2621
2622
2623 public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
2624 return new Path(
2625 HTableDescriptor.getTableDir(rootdir, info.getTableDesc().getName()),
2626 info.getEncodedName());
2627 }
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637 public static boolean rowIsInRange(HRegionInfo info, final byte [] row) {
2638 return ((info.getStartKey().length == 0) ||
2639 (Bytes.compareTo(info.getStartKey(), row) <= 0)) &&
2640 ((info.getEndKey().length == 0) ||
2641 (Bytes.compareTo(info.getEndKey(), row) > 0));
2642 }
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653 public static void makeColumnFamilyDirs(FileSystem fs, Path tabledir,
2654 final HRegionInfo hri, byte [] colFamily)
2655 throws IOException {
2656 Path dir = Store.getStoreHomedir(tabledir, hri.getEncodedName(), colFamily);
2657 if (!fs.mkdirs(dir)) {
2658 LOG.warn("Failed to create " + dir);
2659 }
2660 }
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670 public static HRegion mergeAdjacent(final HRegion srcA, final HRegion srcB)
2671 throws IOException {
2672 HRegion a = srcA;
2673 HRegion b = srcB;
2674
2675
2676
2677 if (srcA.getStartKey() == null) {
2678 if (srcB.getStartKey() == null) {
2679 throw new IOException("Cannot merge two regions with null start key");
2680 }
2681
2682 } else if ((srcB.getStartKey() == null) ||
2683 (Bytes.compareTo(srcA.getStartKey(), srcB.getStartKey()) > 0)) {
2684 a = srcB;
2685 b = srcA;
2686 }
2687
2688 if (!(Bytes.compareTo(a.getEndKey(), b.getStartKey()) == 0)) {
2689 throw new IOException("Cannot merge non-adjacent regions");
2690 }
2691 return merge(a, b);
2692 }
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702 public static HRegion merge(HRegion a, HRegion b) throws IOException {
2703 if (!a.getRegionInfo().getTableDesc().getNameAsString().equals(
2704 b.getRegionInfo().getTableDesc().getNameAsString())) {
2705 throw new IOException("Regions do not belong to the same table");
2706 }
2707
2708 FileSystem fs = a.getFilesystem();
2709
2710
2711
2712 a.flushcache();
2713 b.flushcache();
2714
2715
2716
2717 a.compactStores(true);
2718 if (LOG.isDebugEnabled()) {
2719 LOG.debug("Files for region: " + a);
2720 listPaths(fs, a.getRegionDir());
2721 }
2722 b.compactStores(true);
2723 if (LOG.isDebugEnabled()) {
2724 LOG.debug("Files for region: " + b);
2725 listPaths(fs, b.getRegionDir());
2726 }
2727
2728 Configuration conf = a.getConf();
2729 HTableDescriptor tabledesc = a.getTableDesc();
2730 HLog log = a.getLog();
2731 Path tableDir = a.getTableDir();
2732
2733
2734 final byte[] startKey =
2735 (a.comparator.matchingRows(a.getStartKey(), 0, a.getStartKey().length,
2736 HConstants.EMPTY_BYTE_ARRAY, 0, HConstants.EMPTY_BYTE_ARRAY.length)
2737 || b.comparator.matchingRows(b.getStartKey(), 0,
2738 b.getStartKey().length, HConstants.EMPTY_BYTE_ARRAY, 0,
2739 HConstants.EMPTY_BYTE_ARRAY.length))
2740 ? HConstants.EMPTY_BYTE_ARRAY
2741 : (a.comparator.compareRows(a.getStartKey(), 0, a.getStartKey().length,
2742 b.getStartKey(), 0, b.getStartKey().length) <= 0
2743 ? a.getStartKey()
2744 : b.getStartKey());
2745 final byte[] endKey =
2746 (a.comparator.matchingRows(a.getEndKey(), 0, a.getEndKey().length,
2747 HConstants.EMPTY_BYTE_ARRAY, 0, HConstants.EMPTY_BYTE_ARRAY.length)
2748 || a.comparator.matchingRows(b.getEndKey(), 0, b.getEndKey().length,
2749 HConstants.EMPTY_BYTE_ARRAY, 0,
2750 HConstants.EMPTY_BYTE_ARRAY.length))
2751 ? HConstants.EMPTY_BYTE_ARRAY
2752 : (a.comparator.compareRows(a.getEndKey(), 0, a.getEndKey().length,
2753 b.getEndKey(), 0, b.getEndKey().length) <= 0
2754 ? b.getEndKey()
2755 : a.getEndKey());
2756
2757 HRegionInfo newRegionInfo = new HRegionInfo(tabledesc, startKey, endKey);
2758 LOG.info("Creating new region " + newRegionInfo.toString());
2759 String encodedName = newRegionInfo.getEncodedName();
2760 Path newRegionDir = HRegion.getRegionDir(a.getTableDir(), encodedName);
2761 if(fs.exists(newRegionDir)) {
2762 throw new IOException("Cannot merge; target file collision at " +
2763 newRegionDir);
2764 }
2765 fs.mkdirs(newRegionDir);
2766
2767 LOG.info("starting merge of regions: " + a + " and " + b +
2768 " into new region " + newRegionInfo.toString() +
2769 " with start key <" + Bytes.toString(startKey) + "> and end key <" +
2770 Bytes.toString(endKey) + ">");
2771
2772
2773 Map<byte [], List<StoreFile>> byFamily =
2774 new TreeMap<byte [], List<StoreFile>>(Bytes.BYTES_COMPARATOR);
2775 byFamily = filesByFamily(byFamily, a.close());
2776 byFamily = filesByFamily(byFamily, b.close());
2777 for (Map.Entry<byte [], List<StoreFile>> es : byFamily.entrySet()) {
2778 byte [] colFamily = es.getKey();
2779 makeColumnFamilyDirs(fs, tableDir, newRegionInfo, colFamily);
2780
2781
2782 List<StoreFile> srcFiles = es.getValue();
2783 if (srcFiles.size() == 2) {
2784 long seqA = srcFiles.get(0).getMaxSequenceId();
2785 long seqB = srcFiles.get(1).getMaxSequenceId();
2786 if (seqA == seqB) {
2787
2788
2789
2790 throw new IOException("Files have same sequenceid: " + seqA);
2791 }
2792 }
2793 for (StoreFile hsf: srcFiles) {
2794 StoreFile.rename(fs, hsf.getPath(),
2795 StoreFile.getUniqueFile(fs, Store.getStoreHomedir(tableDir,
2796 newRegionInfo.getEncodedName(), colFamily)));
2797 }
2798 }
2799 if (LOG.isDebugEnabled()) {
2800 LOG.debug("Files for new region");
2801 listPaths(fs, newRegionDir);
2802 }
2803 HRegion dstRegion = HRegion.newHRegion(tableDir, log, fs, conf, newRegionInfo, null);
2804 dstRegion.initialize();
2805 dstRegion.compactStores();
2806 if (LOG.isDebugEnabled()) {
2807 LOG.debug("Files for new region");
2808 listPaths(fs, dstRegion.getRegionDir());
2809 }
2810 deleteRegion(fs, a.getRegionDir());
2811 deleteRegion(fs, b.getRegionDir());
2812
2813 LOG.info("merge completed. New region is " + dstRegion);
2814
2815 return dstRegion;
2816 }
2817
2818
2819
2820
2821
2822
2823
2824
2825 private static Map<byte [], List<StoreFile>> filesByFamily(
2826 Map<byte [], List<StoreFile>> byFamily, List<StoreFile> storeFiles) {
2827 for (StoreFile src: storeFiles) {
2828 byte [] family = src.getFamily();
2829 List<StoreFile> v = byFamily.get(family);
2830 if (v == null) {
2831 v = new ArrayList<StoreFile>();
2832 byFamily.put(family, v);
2833 }
2834 v.add(src);
2835 }
2836 return byFamily;
2837 }
2838
2839
2840
2841
2842
2843 boolean isMajorCompaction() throws IOException {
2844 for (Store store: this.stores.values()) {
2845 if (store.isMajorCompaction()) {
2846 return true;
2847 }
2848 }
2849 return false;
2850 }
2851
2852
2853
2854
2855
2856
2857
2858
2859 private static void listPaths(FileSystem fs, Path dir) throws IOException {
2860 if (LOG.isDebugEnabled()) {
2861 FileStatus[] stats = fs.listStatus(dir);
2862 if (stats == null || stats.length == 0) {
2863 return;
2864 }
2865 for (int i = 0; i < stats.length; i++) {
2866 String path = stats[i].getPath().toString();
2867 if (stats[i].isDir()) {
2868 LOG.debug("d " + path);
2869 listPaths(fs, stats[i].getPath());
2870 } else {
2871 LOG.debug("f " + path + " size=" + stats[i].getLen());
2872 }
2873 }
2874 }
2875 }
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887 public Result get(final Get get, final Integer lockid) throws IOException {
2888
2889 if (get.hasFamilies()) {
2890 for (byte [] family: get.familySet()) {
2891 checkFamily(family);
2892 }
2893 } else {
2894 for (byte[] family: regionInfo.getTableDesc().getFamiliesKeys()) {
2895 get.addFamily(family);
2896 }
2897 }
2898 List<KeyValue> result = get(get);
2899
2900 return new Result(result);
2901 }
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 private List<KeyValue> getLastIncrement(final Get get) throws IOException {
2918 InternalScan iscan = new InternalScan(get);
2919
2920 List<KeyValue> results = new ArrayList<KeyValue>();
2921
2922
2923 iscan.checkOnlyMemStore();
2924 InternalScanner scanner = null;
2925 try {
2926 scanner = getScanner(iscan);
2927 scanner.next(results);
2928 } finally {
2929 if (scanner != null)
2930 scanner.close();
2931 }
2932
2933
2934 int expected = 0;
2935 Map<byte[], NavigableSet<byte[]>> familyMap = get.getFamilyMap();
2936 for (NavigableSet<byte[]> qfs : familyMap.values()) {
2937 expected += qfs.size();
2938 }
2939
2940
2941 if (results.size() == expected) {
2942 return results;
2943 }
2944
2945
2946 if (results != null && !results.isEmpty()) {
2947
2948 for (KeyValue kv : results) {
2949 byte [] family = kv.getFamily();
2950 NavigableSet<byte[]> qfs = familyMap.get(family);
2951 qfs.remove(kv.getQualifier());
2952 if (qfs.isEmpty()) familyMap.remove(family);
2953 expected--;
2954 }
2955
2956 Get newGet = new Get(get.getRow());
2957 for (Map.Entry<byte[], NavigableSet<byte[]>> f : familyMap.entrySet()) {
2958 byte [] family = f.getKey();
2959 for (byte [] qualifier : f.getValue()) {
2960 newGet.addColumn(family, qualifier);
2961 }
2962 }
2963 newGet.setTimeRange(get.getTimeRange().getMin(),
2964 get.getTimeRange().getMax());
2965 iscan = new InternalScan(newGet);
2966 }
2967
2968
2969 List<KeyValue> fileResults = new ArrayList<KeyValue>();
2970 iscan.checkOnlyStoreFiles();
2971 scanner = null;
2972 try {
2973 scanner = getScanner(iscan);
2974 scanner.next(fileResults);
2975 } finally {
2976 if (scanner != null)
2977 scanner.close();
2978 }
2979
2980
2981 results.addAll(fileResults);
2982 Collections.sort(results, KeyValue.COMPARATOR);
2983 return results;
2984 }
2985
2986
2987
2988
2989 private List<KeyValue> get(final Get get) throws IOException {
2990 Scan scan = new Scan(get);
2991
2992 List<KeyValue> results = new ArrayList<KeyValue>();
2993
2994 InternalScanner scanner = null;
2995 try {
2996 scanner = getScanner(scan);
2997 scanner.next(results);
2998 } finally {
2999 if (scanner != null)
3000 scanner.close();
3001 }
3002 return results;
3003 }
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016 public Result increment(Increment increment, Integer lockid,
3017 boolean writeToWAL)
3018 throws IOException {
3019
3020 byte [] row = increment.getRow();
3021 checkRow(row);
3022 TimeRange tr = increment.getTimeRange();
3023 boolean flush = false;
3024 WALEdit walEdits = null;
3025 List<KeyValue> allKVs = new ArrayList<KeyValue>(increment.numColumns());
3026 List<KeyValue> kvs = new ArrayList<KeyValue>(increment.numColumns());
3027 long now = EnvironmentEdgeManager.currentTimeMillis();
3028 long size = 0;
3029
3030
3031 startRegionOperation();
3032 try {
3033 Integer lid = getLock(lockid, row, true);
3034 this.updatesLock.readLock().lock();
3035 try {
3036
3037 for (Map.Entry<byte [], NavigableMap<byte [], Long>> family :
3038 increment.getFamilyMap().entrySet()) {
3039
3040 Store store = stores.get(family.getKey());
3041
3042
3043 Get get = new Get(row);
3044 for (Map.Entry<byte [], Long> column : family.getValue().entrySet()) {
3045 get.addColumn(family.getKey(), column.getKey());
3046 }
3047 get.setTimeRange(tr.getMin(), tr.getMax());
3048 List<KeyValue> results = getLastIncrement(get);
3049
3050
3051
3052 int idx = 0;
3053 for (Map.Entry<byte [], Long> column : family.getValue().entrySet()) {
3054 long amount = column.getValue();
3055 if (idx < results.size() &&
3056 results.get(idx).matchingQualifier(column.getKey())) {
3057 amount += Bytes.toLong(results.get(idx).getValue());
3058 idx++;
3059 }
3060
3061
3062 KeyValue newKV = new KeyValue(row, family.getKey(), column.getKey(),
3063 now, Bytes.toBytes(amount));
3064 kvs.add(newKV);
3065
3066
3067 if (writeToWAL) {
3068 if (walEdits == null) {
3069 walEdits = new WALEdit();
3070 }
3071 walEdits.add(newKV);
3072 }
3073 }
3074
3075
3076 size += store.upsert(kvs);
3077 allKVs.addAll(kvs);
3078 kvs.clear();
3079 }
3080
3081
3082 if (writeToWAL) {
3083 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
3084 walEdits, now);
3085 }
3086
3087 size = this.memstoreSize.addAndGet(size);
3088 flush = isFlushSize(size);
3089 } finally {
3090 this.updatesLock.readLock().unlock();
3091 releaseRowLock(lid);
3092 }
3093 } finally {
3094 closeRegionOperation();
3095 }
3096
3097 if (flush) {
3098
3099 requestFlush();
3100 }
3101
3102 return new Result(allKVs);
3103 }
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115 public long incrementColumnValue(byte [] row, byte [] family,
3116 byte [] qualifier, long amount, boolean writeToWAL)
3117 throws IOException {
3118 checkRow(row);
3119 boolean flush = false;
3120
3121 long result = amount;
3122 startRegionOperation();
3123 try {
3124 Integer lid = obtainRowLock(row);
3125 this.updatesLock.readLock().lock();
3126 try {
3127 Store store = stores.get(family);
3128
3129
3130 Get get = new Get(row);
3131 get.addColumn(family, qualifier);
3132
3133 List<KeyValue> results = getLastIncrement(get);
3134
3135 if (!results.isEmpty()) {
3136 KeyValue kv = results.get(0);
3137 byte [] buffer = kv.getBuffer();
3138 int valueOffset = kv.getValueOffset();
3139 result += Bytes.toLong(buffer, valueOffset, Bytes.SIZEOF_LONG);
3140 }
3141
3142
3143 KeyValue newKv = new KeyValue(row, family,
3144 qualifier, EnvironmentEdgeManager.currentTimeMillis(),
3145 Bytes.toBytes(result));
3146
3147
3148 if (writeToWAL) {
3149 long now = EnvironmentEdgeManager.currentTimeMillis();
3150 WALEdit walEdit = new WALEdit();
3151 walEdit.add(newKv);
3152 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
3153 walEdit, now);
3154 }
3155
3156
3157
3158
3159 long size = store.updateColumnValue(row, family, qualifier, result);
3160
3161 size = this.memstoreSize.addAndGet(size);
3162 flush = isFlushSize(size);
3163 } finally {
3164 this.updatesLock.readLock().unlock();
3165 releaseRowLock(lid);
3166 }
3167 } finally {
3168 closeRegionOperation();
3169 }
3170
3171 if (flush) {
3172
3173 requestFlush();
3174 }
3175
3176 return result;
3177 }
3178
3179
3180
3181
3182
3183
3184 private void checkFamily(final byte [] family)
3185 throws NoSuchColumnFamilyException {
3186 if(!regionInfo.getTableDesc().hasFamily(family)) {
3187 throw new NoSuchColumnFamilyException("Column family " +
3188 Bytes.toString(family) + " does not exist in region " + this
3189 + " in table " + regionInfo.getTableDesc());
3190 }
3191 }
3192
3193 public static final long FIXED_OVERHEAD = ClassSize.align(
3194 (4 * Bytes.SIZEOF_LONG) + Bytes.SIZEOF_BOOLEAN + ClassSize.ARRAY +
3195 (22 * ClassSize.REFERENCE) + ClassSize.OBJECT + Bytes.SIZEOF_INT);
3196
3197 public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD +
3198 (ClassSize.OBJECT * 2) + (2 * ClassSize.ATOMIC_BOOLEAN) +
3199 ClassSize.ATOMIC_LONG + ClassSize.ATOMIC_INTEGER +
3200
3201
3202 ClassSize.TREEMAP +
3203
3204
3205 ClassSize.TREEMAP +
3206
3207 ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY +
3208 ClassSize.align(ClassSize.OBJECT +
3209 (5 * Bytes.SIZEOF_BOOLEAN)) +
3210 (3 * ClassSize.REENTRANT_LOCK));
3211
3212 public long heapSize() {
3213 long heapSize = DEEP_OVERHEAD;
3214 for(Store store : this.stores.values()) {
3215 heapSize += store.heapSize();
3216 }
3217 return heapSize;
3218 }
3219
3220
3221
3222
3223
3224 private static void printUsageAndExit(final String message) {
3225 if (message != null && message.length() > 0) System.out.println(message);
3226 System.out.println("Usage: HRegion CATLALOG_TABLE_DIR [major_compact]");
3227 System.out.println("Options:");
3228 System.out.println(" major_compact Pass this option to major compact " +
3229 "passed region.");
3230 System.out.println("Default outputs scan of passed region.");
3231 System.exit(1);
3232 }
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244 private static void processTable(final FileSystem fs, final Path p,
3245 final HLog log, final Configuration c,
3246 final boolean majorCompact)
3247 throws IOException {
3248 HRegion region = null;
3249 String rootStr = Bytes.toString(HConstants.ROOT_TABLE_NAME);
3250 String metaStr = Bytes.toString(HConstants.META_TABLE_NAME);
3251
3252 if (p.getName().startsWith(rootStr)) {
3253 region = HRegion.newHRegion(p, log, fs, c, HRegionInfo.ROOT_REGIONINFO, null);
3254 } else if (p.getName().startsWith(metaStr)) {
3255 region = HRegion.newHRegion(p, log, fs, c, HRegionInfo.FIRST_META_REGIONINFO,
3256 null);
3257 } else {
3258 throw new IOException("Not a known catalog table: " + p.toString());
3259 }
3260 try {
3261 region.initialize();
3262 if (majorCompact) {
3263 region.compactStores(true);
3264 } else {
3265
3266 Scan scan = new Scan();
3267
3268 InternalScanner scanner = region.getScanner(scan);
3269 try {
3270 List<KeyValue> kvs = new ArrayList<KeyValue>();
3271 boolean done = false;
3272 do {
3273 kvs.clear();
3274 done = scanner.next(kvs);
3275 if (kvs.size() > 0) LOG.info(kvs);
3276 } while (done);
3277 } finally {
3278 scanner.close();
3279 }
3280
3281 }
3282 } finally {
3283 region.close();
3284 }
3285 }
3286
3287 boolean shouldForceSplit() {
3288 return this.splitRequest;
3289 }
3290
3291 byte[] getSplitPoint() {
3292 return this.splitPoint;
3293 }
3294
3295 void forceSplit(byte[] sp) {
3296
3297
3298 this.splitRequest = true;
3299 if (sp != null) {
3300 this.splitPoint = sp;
3301 }
3302 }
3303
3304
3305
3306
3307 protected void prepareToSplit() {
3308
3309 }
3310
3311
3312
3313
3314 public int getCompactPriority() {
3315 int count = Integer.MAX_VALUE;
3316 for(Store store : stores.values()) {
3317 count = Math.min(count, store.getCompactPriority());
3318 }
3319 return count;
3320 }
3321
3322
3323
3324
3325
3326
3327 public boolean hasTooManyStoreFiles() {
3328 for(Store store : stores.values()) {
3329 if(store.hasTooManyStoreFiles()) {
3330 return true;
3331 }
3332 }
3333 return false;
3334 }
3335
3336
3337
3338
3339
3340
3341
3342
3343 private void startRegionOperation() throws NotServingRegionException {
3344 if (this.closing.get()) {
3345 throw new NotServingRegionException(regionInfo.getRegionNameAsString() +
3346 " is closing");
3347 }
3348 lock.readLock().lock();
3349 if (this.closed.get()) {
3350 lock.readLock().unlock();
3351 throw new NotServingRegionException(regionInfo.getRegionNameAsString() +
3352 " is closed");
3353 }
3354 }
3355
3356
3357
3358
3359
3360 private void closeRegionOperation(){
3361 lock.readLock().unlock();
3362 }
3363
3364
3365
3366
3367 private static final List<KeyValue> MOCKED_LIST = new AbstractList<KeyValue>() {
3368
3369 @Override
3370 public void add(int index, KeyValue element) {
3371
3372 }
3373
3374 @Override
3375 public boolean addAll(int index, Collection<? extends KeyValue> c) {
3376 return false;
3377 }
3378
3379 @Override
3380 public KeyValue get(int index) {
3381 throw new UnsupportedOperationException();
3382 }
3383
3384 @Override
3385 public int size() {
3386 return 0;
3387 }
3388 };
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401 public static void main(String[] args) throws IOException {
3402 if (args.length < 1) {
3403 printUsageAndExit(null);
3404 }
3405 boolean majorCompact = false;
3406 if (args.length > 1) {
3407 if (!args[1].toLowerCase().startsWith("major")) {
3408 printUsageAndExit("ERROR: Unrecognized option <" + args[1] + ">");
3409 }
3410 majorCompact = true;
3411 }
3412 final Path tableDir = new Path(args[0]);
3413 final Configuration c = HBaseConfiguration.create();
3414 final FileSystem fs = FileSystem.get(c);
3415 final Path logdir = new Path(c.get("hbase.tmp.dir"),
3416 "hlog" + tableDir.getName()
3417 + EnvironmentEdgeManager.currentTimeMillis());
3418 final Path oldLogDir = new Path(c.get("hbase.tmp.dir"),
3419 HConstants.HREGION_OLDLOGDIR_NAME);
3420 final HLog log = new HLog(fs, logdir, oldLogDir, c);
3421 try {
3422 processTable(fs, tableDir, log, c, majorCompact);
3423 } finally {
3424 log.close();
3425 BlockCache bc = StoreFile.getBlockCache(c);
3426 if (bc != null) bc.shutdown();
3427 }
3428 }
3429 }