1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver;
21
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.io.UnsupportedEncodingException;
26 import java.lang.reflect.Constructor;
27 import java.text.ParseException;
28 import java.util.AbstractList;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.NavigableMap;
37 import java.util.NavigableSet;
38 import java.util.Random;
39 import java.util.Set;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.ConcurrentSkipListMap;
43 import java.util.concurrent.atomic.AtomicBoolean;
44 import java.util.concurrent.atomic.AtomicLong;
45 import java.util.concurrent.locks.ReentrantReadWriteLock;
46
47 import org.apache.commons.logging.Log;
48 import org.apache.commons.logging.LogFactory;
49 import org.apache.hadoop.conf.Configuration;
50 import org.apache.hadoop.fs.FSDataOutputStream;
51 import org.apache.hadoop.fs.FileStatus;
52 import org.apache.hadoop.fs.FileSystem;
53 import org.apache.hadoop.fs.Path;
54 import org.apache.hadoop.hbase.DoNotRetryIOException;
55 import org.apache.hadoop.hbase.DroppedSnapshotException;
56 import org.apache.hadoop.hbase.HBaseConfiguration;
57 import org.apache.hadoop.hbase.HColumnDescriptor;
58 import org.apache.hadoop.hbase.HConstants;
59 import org.apache.hadoop.hbase.HRegionInfo;
60 import org.apache.hadoop.hbase.HTableDescriptor;
61 import org.apache.hadoop.hbase.KeyValue;
62 import org.apache.hadoop.hbase.NotServingRegionException;
63 import org.apache.hadoop.hbase.UnknownScannerException;
64 import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
65 import org.apache.hadoop.hbase.client.Delete;
66 import org.apache.hadoop.hbase.client.Get;
67 import org.apache.hadoop.hbase.client.Increment;
68 import org.apache.hadoop.hbase.client.Put;
69 import org.apache.hadoop.hbase.client.Result;
70 import org.apache.hadoop.hbase.client.Row;
71 import org.apache.hadoop.hbase.client.RowLock;
72 import org.apache.hadoop.hbase.client.Scan;
73 import org.apache.hadoop.hbase.filter.Filter;
74 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
75 import org.apache.hadoop.hbase.io.HeapSize;
76 import org.apache.hadoop.hbase.io.TimeRange;
77 import org.apache.hadoop.hbase.io.hfile.BlockCache;
78 import org.apache.hadoop.hbase.regionserver.wal.HLog;
79 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
80 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
81 import org.apache.hadoop.hbase.util.Bytes;
82 import org.apache.hadoop.hbase.util.CancelableProgressable;
83 import org.apache.hadoop.hbase.util.ClassSize;
84 import org.apache.hadoop.hbase.util.CompressionTest;
85 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
86 import org.apache.hadoop.hbase.util.FSUtils;
87 import org.apache.hadoop.hbase.util.Pair;
88 import org.apache.hadoop.hbase.util.Writables;
89 import org.apache.hadoop.io.Writable;
90 import org.apache.hadoop.util.Progressable;
91 import org.apache.hadoop.util.StringUtils;
92
93 import com.google.common.collect.Lists;
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131 public class HRegion implements HeapSize {
132 public static final Log LOG = LogFactory.getLog(HRegion.class);
133 static final String MERGEDIR = "merges";
134
135 final AtomicBoolean closed = new AtomicBoolean(false);
136
137
138
139
140
141 final AtomicBoolean closing = new AtomicBoolean(false);
142
143
144
145
146
147 private final Set<byte[]> lockedRows =
148 new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
149 private final Map<Integer, byte []> lockIds =
150 new HashMap<Integer, byte []>();
151 private int lockIdGenerator = 1;
152 static private Random rand = new Random();
153
154 protected final Map<byte [], Store> stores =
155 new ConcurrentSkipListMap<byte [], Store>(Bytes.BYTES_RAWCOMPARATOR);
156
157
158
159
160
161
162
163 final AtomicLong memstoreSize = new AtomicLong(0);
164
165
166
167
168
169 final Path tableDir;
170
171 final HLog log;
172 final FileSystem fs;
173 final Configuration conf;
174 final HRegionInfo regionInfo;
175 final Path regiondir;
176 KeyValue.KVComparator comparator;
177
178
179
180
181
182 private volatile boolean forceMajorCompaction = false;
183 private Pair<Long,Long> lastCompactInfo = null;
184
185
186 private final Object closeLock = new Object();
187
188
189
190
191
192 static class WriteState {
193
194 volatile boolean flushing = false;
195
196 volatile boolean flushRequested = false;
197
198 volatile boolean compacting = false;
199
200 volatile boolean writesEnabled = true;
201
202 volatile boolean readOnly = false;
203
204
205
206
207
208
209 synchronized void setReadOnly(final boolean onOff) {
210 this.writesEnabled = !onOff;
211 this.readOnly = onOff;
212 }
213
214 boolean isReadOnly() {
215 return this.readOnly;
216 }
217
218 boolean isFlushRequested() {
219 return this.flushRequested;
220 }
221 }
222
223 final WriteState writestate = new WriteState();
224
225 final long memstoreFlushSize;
226 private volatile long lastFlushTime;
227 private List<Pair<Long,Long>> recentFlushes = new ArrayList<Pair<Long,Long>>();
228 final FlushRequester flushRequester;
229 private final long blockingMemStoreSize;
230 final long threadWakeFrequency;
231
232 final ReentrantReadWriteLock lock =
233 new ReentrantReadWriteLock();
234
235
236 private final ReentrantReadWriteLock updatesLock =
237 new ReentrantReadWriteLock();
238 private boolean splitRequest;
239
240 private final ReadWriteConsistencyControl rwcc =
241 new ReadWriteConsistencyControl();
242
243
244
245
246 public final static String REGIONINFO_FILE = ".regioninfo";
247
248
249
250
251 public HRegion(){
252 this.tableDir = null;
253 this.blockingMemStoreSize = 0L;
254 this.conf = null;
255 this.flushRequester = null;
256 this.fs = null;
257 this.memstoreFlushSize = 0L;
258 this.log = null;
259 this.regiondir = null;
260 this.regionInfo = null;
261 this.threadWakeFrequency = 0L;
262 }
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287 public HRegion(Path tableDir, HLog log, FileSystem fs, Configuration conf,
288 HRegionInfo regionInfo, FlushRequester flushRequester) {
289 this.tableDir = tableDir;
290 this.comparator = regionInfo.getComparator();
291 this.log = log;
292 this.fs = fs;
293 this.conf = conf;
294 this.regionInfo = regionInfo;
295 this.flushRequester = flushRequester;
296 this.threadWakeFrequency = conf.getLong(HConstants.THREAD_WAKE_FREQUENCY,
297 10 * 1000);
298 String encodedNameStr = this.regionInfo.getEncodedName();
299 this.regiondir = getRegionDir(this.tableDir, encodedNameStr);
300 long flushSize = regionInfo.getTableDesc().getMemStoreFlushSize();
301 if (flushSize == HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE) {
302 flushSize = conf.getLong("hbase.hregion.memstore.flush.size",
303 HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
304 }
305 this.memstoreFlushSize = flushSize;
306 this.blockingMemStoreSize = this.memstoreFlushSize *
307 conf.getLong("hbase.hregion.memstore.block.multiplier", 2);
308 if (LOG.isDebugEnabled()) {
309
310 LOG.debug("Instantiated " + this);
311 }
312 }
313
314
315
316
317
318
319 public long initialize() throws IOException {
320 return initialize(null);
321 }
322
323
324
325
326
327
328
329
330 public long initialize(final CancelableProgressable reporter)
331 throws IOException {
332
333 this.closing.set(false);
334 this.closed.set(false);
335
336
337 checkRegioninfoOnFilesystem();
338
339
340 cleanupTmpDir();
341
342
343 long maxSeqId = -1;
344 for (HColumnDescriptor c : this.regionInfo.getTableDesc().getFamilies()) {
345 Store store = instantiateHStore(this.tableDir, c);
346 this.stores.put(c.getName(), store);
347 long storeSeqId = store.getMaxSequenceId();
348 if (storeSeqId > maxSeqId) {
349 maxSeqId = storeSeqId;
350 }
351 }
352
353 maxSeqId = replayRecoveredEditsIfAny(this.regiondir, maxSeqId, reporter);
354
355
356
357
358 SplitTransaction.cleanupAnySplitDetritus(this);
359 FSUtils.deleteDirectory(this.fs, new Path(regiondir, MERGEDIR));
360
361 this.writestate.setReadOnly(this.regionInfo.getTableDesc().isReadOnly());
362
363 this.writestate.compacting = false;
364 this.lastFlushTime = EnvironmentEdgeManager.currentTimeMillis();
365
366
367 long nextSeqid = maxSeqId + 1;
368 LOG.info("Onlined " + this.toString() + "; next sequenceid=" + nextSeqid);
369 return nextSeqid;
370 }
371
372
373
374
375
376
377
378 static void moveInitialFilesIntoPlace(final FileSystem fs,
379 final Path initialFiles, final Path regiondir)
380 throws IOException {
381 if (initialFiles != null && fs.exists(initialFiles)) {
382 if (!fs.rename(initialFiles, regiondir)) {
383 LOG.warn("Unable to rename " + initialFiles + " to " + regiondir);
384 }
385 }
386 }
387
388
389
390
391 public boolean hasReferences() {
392 for (Store store : this.stores.values()) {
393 for (StoreFile sf : store.getStorefiles()) {
394
395 if (sf.isReference()) return true;
396 }
397 }
398 return false;
399 }
400
401
402
403
404
405
406 private void checkRegioninfoOnFilesystem() throws IOException {
407 Path regioninfoPath = new Path(this.regiondir, REGIONINFO_FILE);
408 if (this.fs.exists(regioninfoPath) &&
409 this.fs.getFileStatus(regioninfoPath).getLen() > 0) {
410 return;
411 }
412
413
414
415
416 Path tmpPath = new Path(getTmpDir(), REGIONINFO_FILE);
417 FSDataOutputStream out = this.fs.create(tmpPath, true);
418 try {
419 this.regionInfo.write(out);
420 out.write('\n');
421 out.write('\n');
422 out.write(Bytes.toBytes(this.regionInfo.toString()));
423 } finally {
424 out.close();
425 }
426 if (!fs.rename(tmpPath, regioninfoPath)) {
427 throw new IOException("Unable to rename " + tmpPath + " to " +
428 regioninfoPath);
429 }
430 }
431
432
433 public HRegionInfo getRegionInfo() {
434 return this.regionInfo;
435 }
436
437
438 public boolean isClosed() {
439 return this.closed.get();
440 }
441
442
443
444
445 public boolean isClosing() {
446 return this.closing.get();
447 }
448
449 boolean areWritesEnabled() {
450 synchronized(this.writestate) {
451 return this.writestate.writesEnabled;
452 }
453 }
454
455 public ReadWriteConsistencyControl getRWCC() {
456 return rwcc;
457 }
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472 public List<StoreFile> close() throws IOException {
473 return close(false);
474 }
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490 public List<StoreFile> close(final boolean abort) throws IOException {
491
492
493 synchronized (closeLock) {
494 return doClose(abort);
495 }
496 }
497
498 private List<StoreFile> doClose(final boolean abort)
499 throws IOException {
500 if (isClosed()) {
501 LOG.warn("Region " + this + " already closed");
502 return null;
503 }
504 boolean wasFlushing = false;
505 synchronized (writestate) {
506
507
508 writestate.writesEnabled = false;
509 wasFlushing = writestate.flushing;
510 LOG.debug("Closing " + this + ": disabling compactions & flushes");
511 while (writestate.compacting || writestate.flushing) {
512 LOG.debug("waiting for" +
513 (writestate.compacting ? " compaction" : "") +
514 (writestate.flushing ?
515 (writestate.compacting ? "," : "") + " cache flush" :
516 "") + " to complete for region " + this);
517 try {
518 writestate.wait();
519 } catch (InterruptedException iex) {
520
521 }
522 }
523 }
524
525
526
527 if (!abort && !wasFlushing && worthPreFlushing()) {
528 LOG.info("Running close preflush of " + this.getRegionNameAsString());
529 internalFlushcache();
530 }
531 this.closing.set(true);
532 lock.writeLock().lock();
533 try {
534 if (this.isClosed()) {
535
536 return null;
537 }
538 LOG.debug("Updates disabled for region " + this);
539
540 if (!abort) {
541 internalFlushcache();
542 }
543
544 List<StoreFile> result = new ArrayList<StoreFile>();
545 for (Store store : stores.values()) {
546 result.addAll(store.close());
547 }
548 this.closed.set(true);
549 LOG.info("Closed " + this);
550 return result;
551 } finally {
552 lock.writeLock().unlock();
553 }
554 }
555
556
557
558
559 private boolean worthPreFlushing() {
560 return this.memstoreSize.get() >
561 this.conf.getLong("hbase.hregion.preclose.flush.size", 1024 * 1024 * 5);
562 }
563
564
565
566
567
568
569 public byte [] getStartKey() {
570 return this.regionInfo.getStartKey();
571 }
572
573
574 public byte [] getEndKey() {
575 return this.regionInfo.getEndKey();
576 }
577
578
579 public long getRegionId() {
580 return this.regionInfo.getRegionId();
581 }
582
583
584 public byte [] getRegionName() {
585 return this.regionInfo.getRegionName();
586 }
587
588
589 public String getRegionNameAsString() {
590 return this.regionInfo.getRegionNameAsString();
591 }
592
593
594 public HTableDescriptor getTableDesc() {
595 return this.regionInfo.getTableDesc();
596 }
597
598
599 public HLog getLog() {
600 return this.log;
601 }
602
603
604 public Configuration getConf() {
605 return this.conf;
606 }
607
608
609 public Path getRegionDir() {
610 return this.regiondir;
611 }
612
613
614
615
616
617
618
619
620 public static Path getRegionDir(final Path tabledir, final String name) {
621 return new Path(tabledir, name);
622 }
623
624
625 public FileSystem getFilesystem() {
626 return this.fs;
627 }
628
629
630 public Pair<Long,Long> getLastCompactInfo() {
631 return this.lastCompactInfo;
632 }
633
634
635 public long getLastFlushTime() {
636 return this.lastFlushTime;
637 }
638
639
640 public List<Pair<Long,Long>> getRecentFlushInfo() {
641 this.lock.readLock().lock();
642 List<Pair<Long,Long>> ret = this.recentFlushes;
643 this.recentFlushes = new ArrayList<Pair<Long,Long>>();
644 this.lock.readLock().unlock();
645 return ret;
646 }
647
648
649
650
651
652
653
654
655
656 public long getLargestHStoreSize() {
657 long size = 0;
658 for (Store h: stores.values()) {
659 long storeSize = h.getSize();
660 if (storeSize > size) {
661 size = storeSize;
662 }
663 }
664 return size;
665 }
666
667
668
669
670
671 void doRegionCompactionPrep() throws IOException {
672 }
673
674
675
676
677 private void cleanupTmpDir() throws IOException {
678 FSUtils.deleteDirectory(this.fs, getTmpDir());
679 }
680
681
682
683
684
685 Path getTmpDir() {
686 return new Path(getRegionDir(), ".tmp");
687 }
688
689 void setForceMajorCompaction(final boolean b) {
690 this.forceMajorCompaction = b;
691 }
692
693 boolean getForceMajorCompaction() {
694 return this.forceMajorCompaction;
695 }
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711 public byte [] compactStores() throws IOException {
712 boolean majorCompaction = this.forceMajorCompaction;
713 this.forceMajorCompaction = false;
714 return compactStores(majorCompaction);
715 }
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732 byte [] compactStores(final boolean majorCompaction)
733 throws IOException {
734 if (this.closing.get()) {
735 LOG.debug("Skipping compaction on " + this + " because closing");
736 return null;
737 }
738 lock.readLock().lock();
739 this.lastCompactInfo = null;
740 try {
741 if (this.closed.get()) {
742 LOG.debug("Skipping compaction on " + this + " because closed");
743 return null;
744 }
745 byte [] splitRow = null;
746 if (this.closed.get()) {
747 return splitRow;
748 }
749 try {
750 synchronized (writestate) {
751 if (!writestate.compacting && writestate.writesEnabled) {
752 writestate.compacting = true;
753 } else {
754 LOG.info("NOT compacting region " + this +
755 ": compacting=" + writestate.compacting + ", writesEnabled=" +
756 writestate.writesEnabled);
757 return splitRow;
758 }
759 }
760 LOG.info("Starting" + (majorCompaction? " major " : " ") +
761 "compaction on region " + this);
762 long startTime = EnvironmentEdgeManager.currentTimeMillis();
763 doRegionCompactionPrep();
764 long lastCompactSize = 0;
765 long maxSize = -1;
766 boolean completed = false;
767 try {
768 for (Store store: stores.values()) {
769 final Store.StoreSize ss = store.compact(majorCompaction);
770 lastCompactSize += store.getLastCompactSize();
771 if (ss != null && ss.getSize() > maxSize) {
772 maxSize = ss.getSize();
773 splitRow = ss.getSplitRow();
774 }
775 }
776 completed = true;
777 } catch (InterruptedIOException iioe) {
778 LOG.info("compaction interrupted by user: ", iioe);
779 } finally {
780 long now = EnvironmentEdgeManager.currentTimeMillis();
781 LOG.info(((completed) ? "completed" : "aborted")
782 + " compaction on region " + this
783 + " after " + StringUtils.formatTimeDiff(now, startTime));
784 if (completed) {
785 this.lastCompactInfo =
786 new Pair<Long,Long>((now - startTime) / 1000, lastCompactSize);
787 }
788 }
789 } finally {
790 synchronized (writestate) {
791 writestate.compacting = false;
792 writestate.notifyAll();
793 }
794 }
795 return splitRow;
796 } finally {
797 lock.readLock().unlock();
798 }
799 }
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821 public boolean flushcache() throws IOException {
822
823 if (this.closing.get()) {
824 LOG.debug("Skipping flush on " + this + " because closing");
825 return false;
826 }
827 lock.readLock().lock();
828 try {
829 if (this.closed.get()) {
830 LOG.debug("Skipping flush on " + this + " because closed");
831 return false;
832 }
833 try {
834 synchronized (writestate) {
835 if (!writestate.flushing && writestate.writesEnabled) {
836 this.writestate.flushing = true;
837 } else {
838 if (LOG.isDebugEnabled()) {
839 LOG.debug("NOT flushing memstore for region " + this +
840 ", flushing=" +
841 writestate.flushing + ", writesEnabled=" +
842 writestate.writesEnabled);
843 }
844 return false;
845 }
846 }
847 return internalFlushcache();
848 } finally {
849 synchronized (writestate) {
850 writestate.flushing = false;
851 this.writestate.flushRequested = false;
852 writestate.notifyAll();
853 }
854 }
855 } finally {
856 lock.readLock().unlock();
857 }
858 }
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894 protected boolean internalFlushcache() throws IOException {
895 return internalFlushcache(this.log, -1);
896 }
897
898
899
900
901
902
903
904
905
906 protected boolean internalFlushcache(final HLog wal, final long myseqid)
907 throws IOException {
908 final long startTime = EnvironmentEdgeManager.currentTimeMillis();
909
910
911 this.lastFlushTime = startTime;
912
913 if (this.memstoreSize.get() <= 0) {
914 return false;
915 }
916 if (LOG.isDebugEnabled()) {
917 LOG.debug("Started memstore flush for " + this +
918 ", current region memstore size " +
919 StringUtils.humanReadableInt(this.memstoreSize.get()) +
920 ((wal != null)? "": "; wal is null, using passed sequenceid=" + myseqid));
921 }
922
923
924
925
926
927
928
929
930 long sequenceId = -1L;
931 long completeSequenceId = -1L;
932
933
934
935
936 this.updatesLock.writeLock().lock();
937 final long currentMemStoreSize = this.memstoreSize.get();
938 List<StoreFlusher> storeFlushers = new ArrayList<StoreFlusher>(stores.size());
939 try {
940 sequenceId = (wal == null)? myseqid: wal.startCacheFlush();
941 completeSequenceId = this.getCompleteCacheFlushSequenceId(sequenceId);
942
943 for (Store s : stores.values()) {
944 storeFlushers.add(s.getStoreFlusher(completeSequenceId));
945 }
946
947
948 for (StoreFlusher flusher : storeFlushers) {
949 flusher.prepare();
950 }
951 } finally {
952 this.updatesLock.writeLock().unlock();
953 }
954
955 LOG.debug("Finished snapshotting, commencing flushing stores");
956
957
958
959
960
961 boolean compactionRequested = false;
962 try {
963
964
965
966
967 for (StoreFlusher flusher : storeFlushers) {
968 flusher.flushCache();
969 }
970
971
972 for (StoreFlusher flusher : storeFlushers) {
973 boolean needsCompaction = flusher.commit();
974 if (needsCompaction) {
975 compactionRequested = true;
976 }
977 }
978 storeFlushers.clear();
979
980
981 this.memstoreSize.addAndGet(-currentMemStoreSize);
982 } catch (Throwable t) {
983
984
985
986
987
988
989 if (wal != null) wal.abortCacheFlush();
990 DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
991 Bytes.toStringBinary(getRegionName()));
992 dse.initCause(t);
993 throw dse;
994 }
995
996
997
998
999
1000
1001
1002
1003 if (wal != null) {
1004 wal.completeCacheFlush(this.regionInfo.getEncodedNameAsBytes(),
1005 regionInfo.getTableDesc().getName(), completeSequenceId,
1006 this.getRegionInfo().isMetaRegion());
1007 }
1008
1009
1010
1011 synchronized (this) {
1012 notifyAll();
1013 }
1014
1015 long time = EnvironmentEdgeManager.currentTimeMillis() - startTime;
1016 if (LOG.isDebugEnabled()) {
1017 LOG.info("Finished memstore flush of ~" +
1018 StringUtils.humanReadableInt(currentMemStoreSize) + " for region " +
1019 this + " in " + time + "ms, sequenceid=" + sequenceId +
1020 ", compaction requested=" + compactionRequested +
1021 ((wal == null)? "; wal=null": ""));
1022 }
1023 this.recentFlushes.add(new Pair<Long,Long>(time/1000,currentMemStoreSize));
1024
1025 return compactionRequested;
1026 }
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 protected long getCompleteCacheFlushSequenceId(long currentSequenceId) {
1037 return currentSequenceId;
1038 }
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052 Result getClosestRowBefore(final byte [] row)
1053 throws IOException{
1054 return getClosestRowBefore(row, HConstants.CATALOG_FAMILY);
1055 }
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 public Result getClosestRowBefore(final byte [] row, final byte [] family)
1068 throws IOException {
1069
1070
1071 KeyValue key = null;
1072 checkRow(row);
1073 startRegionOperation();
1074 try {
1075 Store store = getStore(family);
1076 KeyValue kv = new KeyValue(row, HConstants.LATEST_TIMESTAMP);
1077
1078 key = store.getRowKeyAtOrBefore(kv);
1079 if (key == null) {
1080 return null;
1081 }
1082 Get get = new Get(key.getRow());
1083 get.addFamily(family);
1084 return get(get, null);
1085 } finally {
1086 closeRegionOperation();
1087 }
1088 }
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100 public InternalScanner getScanner(Scan scan)
1101 throws IOException {
1102 return getScanner(scan, null);
1103 }
1104
1105 protected InternalScanner getScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
1106 startRegionOperation();
1107 try {
1108
1109 if(scan.hasFamilies()) {
1110 for(byte [] family : scan.getFamilyMap().keySet()) {
1111 checkFamily(family);
1112 }
1113 } else {
1114 for(byte[] family: regionInfo.getTableDesc().getFamiliesKeys()){
1115 scan.addFamily(family);
1116 }
1117 }
1118 return instantiateInternalScanner(scan, additionalScanners);
1119
1120 } finally {
1121 closeRegionOperation();
1122 }
1123 }
1124
1125 protected InternalScanner instantiateInternalScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
1126 return new RegionScanner(scan, additionalScanners);
1127 }
1128
1129
1130
1131
1132 private void prepareDelete(Delete delete) throws IOException {
1133
1134 if(delete.getFamilyMap().isEmpty()){
1135 for(byte [] family : regionInfo.getTableDesc().getFamiliesKeys()){
1136
1137 delete.deleteFamily(family, delete.getTimeStamp());
1138 }
1139 } else {
1140 for(byte [] family : delete.getFamilyMap().keySet()) {
1141 if(family == null) {
1142 throw new NoSuchColumnFamilyException("Empty family is invalid");
1143 }
1144 checkFamily(family);
1145 }
1146 }
1147 }
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158 public void delete(Delete delete, Integer lockid, boolean writeToWAL)
1159 throws IOException {
1160 checkReadOnly();
1161 checkResources();
1162 Integer lid = null;
1163 startRegionOperation();
1164 try {
1165 byte [] row = delete.getRow();
1166
1167 lid = getLock(lockid, row, true);
1168
1169
1170 prepareDelete(delete);
1171 delete(delete.getFamilyMap(), writeToWAL);
1172
1173 } finally {
1174 if(lockid == null) releaseRowLock(lid);
1175 closeRegionOperation();
1176 }
1177 }
1178
1179
1180
1181
1182
1183
1184
1185 public void delete(Map<byte[], List<KeyValue>> familyMap, boolean writeToWAL)
1186 throws IOException {
1187 long now = EnvironmentEdgeManager.currentTimeMillis();
1188 byte [] byteNow = Bytes.toBytes(now);
1189 boolean flush = false;
1190
1191 updatesLock.readLock().lock();
1192
1193 try {
1194
1195 for (Map.Entry<byte[], List<KeyValue>> e : familyMap.entrySet()) {
1196
1197 byte[] family = e.getKey();
1198 List<KeyValue> kvs = e.getValue();
1199 Map<byte[], Integer> kvCount = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
1200
1201 for (KeyValue kv: kvs) {
1202
1203
1204 if (kv.isLatestTimestamp() && kv.isDeleteType()) {
1205 byte[] qual = kv.getQualifier();
1206 if (qual == null) qual = HConstants.EMPTY_BYTE_ARRAY;
1207
1208 Integer count = kvCount.get(qual);
1209 if (count == null) {
1210 kvCount.put(qual, 1);
1211 } else {
1212 kvCount.put(qual, count + 1);
1213 }
1214 count = kvCount.get(qual);
1215
1216 Get get = new Get(kv.getRow());
1217 get.setMaxVersions(count);
1218 get.addColumn(family, qual);
1219
1220 List<KeyValue> result = get(get);
1221
1222 if (result.size() < count) {
1223
1224 kv.updateLatestStamp(byteNow);
1225 continue;
1226 }
1227 if (result.size() > count) {
1228 throw new RuntimeException("Unexpected size: " + result.size());
1229 }
1230 KeyValue getkv = result.get(count - 1);
1231 Bytes.putBytes(kv.getBuffer(), kv.getTimestampOffset(),
1232 getkv.getBuffer(), getkv.getTimestampOffset(), Bytes.SIZEOF_LONG);
1233 } else {
1234 kv.updateLatestStamp(byteNow);
1235 }
1236 }
1237 }
1238
1239 if (writeToWAL) {
1240
1241
1242
1243
1244
1245
1246
1247
1248 WALEdit walEdit = new WALEdit();
1249 addFamilyMapToWALEdit(familyMap, walEdit);
1250 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1251 walEdit, now);
1252 }
1253
1254
1255 long addedSize = applyFamilyMapToMemstore(familyMap);
1256 flush = isFlushSize(memstoreSize.addAndGet(addedSize));
1257 } finally {
1258 this.updatesLock.readLock().unlock();
1259 }
1260
1261 if (flush) {
1262
1263 requestFlush();
1264 }
1265 }
1266
1267
1268
1269
1270
1271 public void put(Put put) throws IOException {
1272 this.put(put, null, put.getWriteToWAL());
1273 }
1274
1275
1276
1277
1278
1279
1280 public void put(Put put, boolean writeToWAL) throws IOException {
1281 this.put(put, null, writeToWAL);
1282 }
1283
1284
1285
1286
1287
1288
1289 public void put(Put put, Integer lockid) throws IOException {
1290 this.put(put, lockid, put.getWriteToWAL());
1291 }
1292
1293
1294
1295
1296
1297
1298
1299 public void put(Put put, Integer lockid, boolean writeToWAL)
1300 throws IOException {
1301 checkReadOnly();
1302
1303
1304
1305
1306
1307 checkResources();
1308 startRegionOperation();
1309 try {
1310
1311
1312
1313
1314
1315 byte [] row = put.getRow();
1316
1317 Integer lid = getLock(lockid, row, true);
1318
1319 try {
1320
1321 put(put.getFamilyMap(), writeToWAL);
1322 } finally {
1323 if(lockid == null) releaseRowLock(lid);
1324 }
1325 } finally {
1326 closeRegionOperation();
1327 }
1328 }
1329
1330
1331
1332
1333
1334
1335 private static class BatchOperationInProgress<T> {
1336 T[] operations;
1337 OperationStatusCode[] retCodes;
1338 int nextIndexToProcess = 0;
1339
1340 public BatchOperationInProgress(T[] operations) {
1341 this.operations = operations;
1342 retCodes = new OperationStatusCode[operations.length];
1343 Arrays.fill(retCodes, OperationStatusCode.NOT_RUN);
1344 }
1345
1346 public boolean isDone() {
1347 return nextIndexToProcess == operations.length;
1348 }
1349 }
1350
1351
1352
1353
1354
1355 public OperationStatusCode[] put(Put[] puts) throws IOException {
1356 @SuppressWarnings("unchecked")
1357 Pair<Put, Integer> putsAndLocks[] = new Pair[puts.length];
1358
1359 for (int i = 0; i < puts.length; i++) {
1360 putsAndLocks[i] = new Pair<Put, Integer>(puts[i], null);
1361 }
1362 return put(putsAndLocks);
1363 }
1364
1365
1366
1367
1368
1369
1370 public OperationStatusCode[] put(Pair<Put, Integer>[] putsAndLocks) throws IOException {
1371 BatchOperationInProgress<Pair<Put, Integer>> batchOp =
1372 new BatchOperationInProgress<Pair<Put,Integer>>(putsAndLocks);
1373
1374 while (!batchOp.isDone()) {
1375 checkReadOnly();
1376 checkResources();
1377
1378 long newSize;
1379 startRegionOperation();
1380 try {
1381 long addedSize = doMiniBatchPut(batchOp);
1382 newSize = memstoreSize.addAndGet(addedSize);
1383 } finally {
1384 closeRegionOperation();
1385 }
1386 if (isFlushSize(newSize)) {
1387 requestFlush();
1388 }
1389 }
1390 return batchOp.retCodes;
1391 }
1392
1393 private long doMiniBatchPut(BatchOperationInProgress<Pair<Put, Integer>> batchOp) throws IOException {
1394 long now = EnvironmentEdgeManager.currentTimeMillis();
1395 byte[] byteNow = Bytes.toBytes(now);
1396 boolean locked = false;
1397
1398
1399 List<Integer> acquiredLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
1400
1401 int firstIndex = batchOp.nextIndexToProcess;
1402 int lastIndexExclusive = firstIndex;
1403 boolean success = false;
1404 try {
1405
1406
1407
1408
1409 int numReadyToWrite = 0;
1410 while (lastIndexExclusive < batchOp.operations.length) {
1411 Pair<Put, Integer> nextPair = batchOp.operations[lastIndexExclusive];
1412 Put put = nextPair.getFirst();
1413 Integer providedLockId = nextPair.getSecond();
1414
1415
1416 try {
1417 checkFamilies(put.getFamilyMap().keySet());
1418 } catch (NoSuchColumnFamilyException nscf) {
1419 LOG.warn("No such column family in batch put", nscf);
1420 batchOp.retCodes[lastIndexExclusive] = OperationStatusCode.BAD_FAMILY;
1421 lastIndexExclusive++;
1422 continue;
1423 }
1424
1425
1426
1427 boolean shouldBlock = numReadyToWrite == 0;
1428 Integer acquiredLockId = getLock(providedLockId, put.getRow(), shouldBlock);
1429 if (acquiredLockId == null) {
1430
1431 assert !shouldBlock : "Should never fail to get lock when blocking";
1432 break;
1433 }
1434 if (providedLockId == null) {
1435 acquiredLocks.add(acquiredLockId);
1436 }
1437 lastIndexExclusive++;
1438 numReadyToWrite++;
1439 }
1440
1441 if (numReadyToWrite <= 0) return 0L;
1442
1443
1444
1445
1446
1447
1448 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1449 updateKVTimestamps(
1450 batchOp.operations[i].getFirst().getFamilyMap().values(),
1451 byteNow);
1452 }
1453
1454
1455 this.updatesLock.readLock().lock();
1456 locked = true;
1457
1458
1459
1460
1461 WALEdit walEdit = new WALEdit();
1462 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1463
1464 if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
1465
1466 Put p = batchOp.operations[i].getFirst();
1467 if (!p.getWriteToWAL()) continue;
1468 addFamilyMapToWALEdit(p.getFamilyMap(), walEdit);
1469 }
1470
1471
1472 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1473 walEdit, now);
1474
1475
1476
1477
1478 long addedSize = 0;
1479 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1480 if (batchOp.retCodes[i] != OperationStatusCode.NOT_RUN) continue;
1481
1482 Put p = batchOp.operations[i].getFirst();
1483 addedSize += applyFamilyMapToMemstore(p.getFamilyMap());
1484 batchOp.retCodes[i] = OperationStatusCode.SUCCESS;
1485 }
1486 success = true;
1487 return addedSize;
1488 } finally {
1489 if (locked)
1490 this.updatesLock.readLock().unlock();
1491
1492 for (Integer toRelease : acquiredLocks) {
1493 releaseRowLock(toRelease);
1494 }
1495 if (!success) {
1496 for (int i = firstIndex; i < lastIndexExclusive; i++) {
1497 if (batchOp.retCodes[i] == OperationStatusCode.NOT_RUN) {
1498 batchOp.retCodes[i] = OperationStatusCode.FAILURE;
1499 }
1500 }
1501 }
1502 batchOp.nextIndexToProcess = lastIndexExclusive;
1503 }
1504 }
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521 public boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier,
1522 byte [] expectedValue, Writable w, Integer lockId, boolean writeToWAL)
1523 throws IOException{
1524 checkReadOnly();
1525
1526
1527 checkResources();
1528 boolean isPut = w instanceof Put;
1529 if (!isPut && !(w instanceof Delete))
1530 throw new DoNotRetryIOException("Action must be Put or Delete");
1531 Row r = (Row)w;
1532 if (Bytes.compareTo(row, r.getRow()) != 0) {
1533 throw new DoNotRetryIOException("Action's getRow must match the passed row");
1534 }
1535
1536 startRegionOperation();
1537 try {
1538 RowLock lock = isPut ? ((Put)w).getRowLock() : ((Delete)w).getRowLock();
1539 Get get = new Get(row, lock);
1540 checkFamily(family);
1541 get.addColumn(family, qualifier);
1542
1543
1544 Integer lid = getLock(lockId, get.getRow(), true);
1545 List<KeyValue> result = new ArrayList<KeyValue>();
1546 try {
1547 result = get(get);
1548
1549 boolean matches = false;
1550 if (result.size() == 0 &&
1551 (expectedValue == null || expectedValue.length == 0)) {
1552 matches = true;
1553 } else if (result.size() == 1) {
1554
1555 byte [] actualValue = result.get(0).getValue();
1556 matches = Bytes.equals(expectedValue, actualValue);
1557 }
1558
1559 if (matches) {
1560
1561 if (isPut) {
1562 put(((Put)w).getFamilyMap(), writeToWAL);
1563 } else {
1564 Delete d = (Delete)w;
1565 prepareDelete(d);
1566 delete(d.getFamilyMap(), writeToWAL);
1567 }
1568 return true;
1569 }
1570 return false;
1571 } finally {
1572 if(lockId == null) releaseRowLock(lid);
1573 }
1574 } finally {
1575 closeRegionOperation();
1576 }
1577 }
1578
1579
1580
1581
1582
1583
1584 private void updateKVTimestamps(
1585 final Iterable<List<KeyValue>> keyLists, final byte[] now) {
1586 for (List<KeyValue> keys: keyLists) {
1587 if (keys == null) continue;
1588 for (KeyValue key : keys) {
1589 key.updateLatestStamp(now);
1590 }
1591 }
1592 }
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 private void checkResources() {
1604
1605
1606 if (this.getRegionInfo().isMetaRegion()) return;
1607
1608 boolean blocked = false;
1609 while (this.memstoreSize.get() > this.blockingMemStoreSize) {
1610 requestFlush();
1611 if (!blocked) {
1612 LOG.info("Blocking updates for '" + Thread.currentThread().getName() +
1613 "' on region " + Bytes.toStringBinary(getRegionName()) +
1614 ": memstore size " +
1615 StringUtils.humanReadableInt(this.memstoreSize.get()) +
1616 " is >= than blocking " +
1617 StringUtils.humanReadableInt(this.blockingMemStoreSize) + " size");
1618 }
1619 blocked = true;
1620 synchronized(this) {
1621 try {
1622 wait(threadWakeFrequency);
1623 } catch (InterruptedException e) {
1624
1625 }
1626 }
1627 }
1628 if (blocked) {
1629 LOG.info("Unblocking updates for region " + this + " '"
1630 + Thread.currentThread().getName() + "'");
1631 }
1632 }
1633
1634
1635
1636
1637 protected void checkReadOnly() throws IOException {
1638 if (this.writestate.isReadOnly()) {
1639 throw new IOException("region is read only");
1640 }
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651 private void put(final byte [] family, final List<KeyValue> edits)
1652 throws IOException {
1653 Map<byte[], List<KeyValue>> familyMap = new HashMap<byte[], List<KeyValue>>();
1654 familyMap.put(family, edits);
1655 this.put(familyMap, true);
1656 }
1657
1658
1659
1660
1661
1662
1663
1664
1665 private void put(final Map<byte [], List<KeyValue>> familyMap,
1666 boolean writeToWAL) throws IOException {
1667 long now = EnvironmentEdgeManager.currentTimeMillis();
1668 byte[] byteNow = Bytes.toBytes(now);
1669 boolean flush = false;
1670 this.updatesLock.readLock().lock();
1671 try {
1672 checkFamilies(familyMap.keySet());
1673 updateKVTimestamps(familyMap.values(), byteNow);
1674
1675
1676
1677
1678
1679 if (writeToWAL) {
1680 WALEdit walEdit = new WALEdit();
1681 addFamilyMapToWALEdit(familyMap, walEdit);
1682 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
1683 walEdit, now);
1684 }
1685
1686 long addedSize = applyFamilyMapToMemstore(familyMap);
1687 flush = isFlushSize(memstoreSize.addAndGet(addedSize));
1688 } finally {
1689 this.updatesLock.readLock().unlock();
1690 }
1691 if (flush) {
1692
1693 requestFlush();
1694 }
1695 }
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706 private long applyFamilyMapToMemstore(Map<byte[], List<KeyValue>> familyMap) {
1707 ReadWriteConsistencyControl.WriteEntry w = null;
1708 long size = 0;
1709 try {
1710 w = rwcc.beginMemstoreInsert();
1711
1712 for (Map.Entry<byte[], List<KeyValue>> e : familyMap.entrySet()) {
1713 byte[] family = e.getKey();
1714 List<KeyValue> edits = e.getValue();
1715
1716 Store store = getStore(family);
1717 for (KeyValue kv: edits) {
1718 kv.setMemstoreTS(w.getWriteNumber());
1719 size += store.add(kv);
1720 }
1721 }
1722 } finally {
1723 rwcc.completeMemstoreInsert(w);
1724 }
1725 return size;
1726 }
1727
1728
1729
1730
1731
1732 private void checkFamilies(Collection<byte[]> families)
1733 throws NoSuchColumnFamilyException {
1734 for (byte[] family : families) {
1735 checkFamily(family);
1736 }
1737 }
1738
1739
1740
1741
1742
1743
1744
1745 private void addFamilyMapToWALEdit(Map<byte[], List<KeyValue>> familyMap,
1746 WALEdit walEdit) {
1747 for (List<KeyValue> edits : familyMap.values()) {
1748 for (KeyValue kv : edits) {
1749 walEdit.add(kv);
1750 }
1751 }
1752 }
1753
1754 private void requestFlush() {
1755 if (this.flushRequester == null) {
1756 return;
1757 }
1758 synchronized (writestate) {
1759 if (this.writestate.isFlushRequested()) {
1760 return;
1761 }
1762 writestate.flushRequested = true;
1763 }
1764
1765 this.flushRequester.requestFlush(this);
1766 if (LOG.isDebugEnabled()) {
1767 LOG.debug("Flush requested on " + this);
1768 }
1769 }
1770
1771
1772
1773
1774
1775 private boolean isFlushSize(final long size) {
1776 return size > this.memstoreFlushSize;
1777 }
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815 protected long replayRecoveredEditsIfAny(final Path regiondir,
1816 final long minSeqId, final CancelableProgressable reporter)
1817 throws UnsupportedEncodingException, IOException {
1818 long seqid = minSeqId;
1819 NavigableSet<Path> files = HLog.getSplitEditFilesSorted(this.fs, regiondir);
1820 if (files == null || files.isEmpty()) return seqid;
1821 for (Path edits: files) {
1822 if (edits == null || !this.fs.exists(edits)) {
1823 LOG.warn("Null or non-existent edits file: " + edits);
1824 continue;
1825 }
1826 if (isZeroLengthThenDelete(this.fs, edits)) continue;
1827 try {
1828 seqid = replayRecoveredEdits(edits, seqid, reporter);
1829 } catch (IOException e) {
1830 boolean skipErrors = conf.getBoolean("hbase.skip.errors", false);
1831 if (skipErrors) {
1832 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1833 LOG.error("hbase.skip.errors=true so continuing. Renamed " + edits +
1834 " as " + p, e);
1835 } else {
1836 throw e;
1837 }
1838 }
1839 }
1840 if (seqid > minSeqId) {
1841
1842 internalFlushcache(null, seqid);
1843 }
1844
1845 for (Path file: files) {
1846 if (!this.fs.delete(file, false)) {
1847 LOG.error("Failed delete of " + file);
1848 } else {
1849 LOG.debug("Deleted recovered.edits file=" + file);
1850 }
1851 }
1852 return seqid;
1853 }
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864 private long replayRecoveredEdits(final Path edits,
1865 final long minSeqId, final CancelableProgressable reporter)
1866 throws IOException {
1867 LOG.info("Replaying edits from " + edits + "; minSequenceid=" + minSeqId);
1868 HLog.Reader reader = HLog.getReader(this.fs, edits, conf);
1869 try {
1870 long currentEditSeqId = minSeqId;
1871 long firstSeqIdInLog = -1;
1872 long skippedEdits = 0;
1873 long editsCount = 0;
1874 long intervalEdits = 0;
1875 HLog.Entry entry;
1876 Store store = null;
1877
1878 try {
1879
1880 int interval = this.conf.getInt("hbase.hstore.report.interval.edits",
1881 2000);
1882
1883 int period = this.conf.getInt("hbase.hstore.report.period",
1884 this.conf.getInt("hbase.master.assignment.timeoutmonitor.timeout",
1885 30000) / 2);
1886 long lastReport = EnvironmentEdgeManager.currentTimeMillis();
1887
1888 while ((entry = reader.next()) != null) {
1889 HLogKey key = entry.getKey();
1890 WALEdit val = entry.getEdit();
1891
1892 if (reporter != null) {
1893 intervalEdits += val.size();
1894 if (intervalEdits >= interval) {
1895
1896 intervalEdits = 0;
1897 long cur = EnvironmentEdgeManager.currentTimeMillis();
1898 if (lastReport + period <= cur) {
1899
1900 if(!reporter.progress()) {
1901 String msg = "Progressable reporter failed, stopping replay";
1902 LOG.warn(msg);
1903 throw new IOException(msg);
1904 }
1905 lastReport = cur;
1906 }
1907 }
1908 }
1909
1910 if (firstSeqIdInLog == -1) {
1911 firstSeqIdInLog = key.getLogSeqNum();
1912 }
1913
1914 if (key.getLogSeqNum() <= currentEditSeqId) {
1915 skippedEdits++;
1916 continue;
1917 }
1918 currentEditSeqId = key.getLogSeqNum();
1919 boolean flush = false;
1920 for (KeyValue kv: val.getKeyValues()) {
1921
1922
1923 if (kv.matchingFamily(HLog.METAFAMILY) ||
1924 !Bytes.equals(key.getEncodedRegionName(), this.regionInfo.getEncodedNameAsBytes())) {
1925 skippedEdits++;
1926 continue;
1927 }
1928
1929 if (store == null || !kv.matchingFamily(store.getFamily().getName())) {
1930 store = this.stores.get(kv.getFamily());
1931 }
1932 if (store == null) {
1933
1934
1935 LOG.warn("No family for " + kv);
1936 skippedEdits++;
1937 continue;
1938 }
1939
1940
1941
1942 flush = restoreEdit(store, kv);
1943 editsCount++;
1944 }
1945 if (flush) internalFlushcache(null, currentEditSeqId);
1946 }
1947 } catch (EOFException eof) {
1948 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1949 LOG.warn("Encountered EOF. Most likely due to Master failure during " +
1950 "log spliting, so we have this data in another edit. " +
1951 "Continuing, but renaming " + edits + " as " + p, eof);
1952 } catch (IOException ioe) {
1953
1954
1955 if (ioe.getCause() instanceof ParseException) {
1956 Path p = HLog.moveAsideBadEditsFile(fs, edits);
1957 LOG.warn("File corruption encountered! " +
1958 "Continuing, but renaming " + edits + " as " + p, ioe);
1959 } else {
1960
1961
1962 throw ioe;
1963 }
1964 }
1965 if (LOG.isDebugEnabled()) {
1966 LOG.debug("Applied " + editsCount + ", skipped " + skippedEdits +
1967 ", firstSequenceidInLog=" + firstSeqIdInLog +
1968 ", maxSequenceidInLog=" + currentEditSeqId);
1969 }
1970 return currentEditSeqId;
1971 } finally {
1972 reader.close();
1973 }
1974 }
1975
1976
1977
1978
1979
1980
1981
1982 protected boolean restoreEdit(final Store s, final KeyValue kv) {
1983 return isFlushSize(this.memstoreSize.addAndGet(s.add(kv)));
1984 }
1985
1986
1987
1988
1989
1990
1991
1992 private static boolean isZeroLengthThenDelete(final FileSystem fs, final Path p)
1993 throws IOException {
1994 FileStatus stat = fs.getFileStatus(p);
1995 if (stat.getLen() > 0) return false;
1996 LOG.warn("File " + p + " is zero-length, deleting.");
1997 fs.delete(p, false);
1998 return true;
1999 }
2000
2001 protected Store instantiateHStore(Path tableDir, HColumnDescriptor c)
2002 throws IOException {
2003 return new Store(tableDir, this, c, this.fs, this.conf);
2004 }
2005
2006
2007
2008
2009
2010
2011
2012
2013 public Store getStore(final byte [] column) {
2014 return this.stores.get(column);
2015 }
2016
2017
2018
2019
2020
2021
2022 private void checkRow(final byte [] row) throws IOException {
2023 if(!rowIsInRange(regionInfo, row)) {
2024 throw new WrongRegionException("Requested row out of range for " +
2025 "HRegion " + this + ", startKey='" +
2026 Bytes.toStringBinary(regionInfo.getStartKey()) + "', getEndKey()='" +
2027 Bytes.toStringBinary(regionInfo.getEndKey()) + "', row='" +
2028 Bytes.toStringBinary(row) + "'");
2029 }
2030 }
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055 public Integer obtainRowLock(final byte [] row) throws IOException {
2056 startRegionOperation();
2057 try {
2058 return internalObtainRowLock(row, true);
2059 } finally {
2060 closeRegionOperation();
2061 }
2062 }
2063
2064
2065
2066
2067
2068
2069
2070 public Integer tryObtainRowLock(final byte[] row) throws IOException {
2071 startRegionOperation();
2072 try {
2073 return internalObtainRowLock(row, false);
2074 } finally {
2075 closeRegionOperation();
2076 }
2077 }
2078
2079
2080
2081
2082
2083
2084
2085 private Integer internalObtainRowLock(final byte[] row, boolean waitForLock)
2086 throws IOException {
2087 checkRow(row);
2088 startRegionOperation();
2089 try {
2090 synchronized (lockedRows) {
2091 while (lockedRows.contains(row)) {
2092 if (!waitForLock) {
2093 return null;
2094 }
2095 try {
2096 lockedRows.wait();
2097 } catch (InterruptedException ie) {
2098
2099 }
2100 }
2101
2102
2103
2104
2105
2106
2107 byte [] prev = null;
2108 Integer lockId = null;
2109 do {
2110 lockId = new Integer(lockIdGenerator++);
2111 prev = lockIds.put(lockId, row);
2112 if (prev != null) {
2113 lockIds.put(lockId, prev);
2114 lockIdGenerator = rand.nextInt();
2115 }
2116 } while (prev != null);
2117
2118 lockedRows.add(row);
2119 lockedRows.notifyAll();
2120 return lockId;
2121 }
2122 } finally {
2123 closeRegionOperation();
2124 }
2125 }
2126
2127
2128
2129
2130
2131
2132 byte [] getRowFromLock(final Integer lockid) {
2133 synchronized (lockedRows) {
2134 return lockIds.get(lockid);
2135 }
2136 }
2137
2138
2139
2140
2141
2142 void releaseRowLock(final Integer lockid) {
2143 synchronized (lockedRows) {
2144 byte[] row = lockIds.remove(lockid);
2145 lockedRows.remove(row);
2146 lockedRows.notifyAll();
2147 }
2148 }
2149
2150
2151
2152
2153
2154
2155 boolean isRowLocked(final Integer lockid) {
2156 synchronized (lockedRows) {
2157 if (lockIds.get(lockid) != null) {
2158 return true;
2159 }
2160 return false;
2161 }
2162 }
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173 private Integer getLock(Integer lockid, byte [] row, boolean waitForLock)
2174 throws IOException {
2175 Integer lid = null;
2176 if (lockid == null) {
2177 lid = internalObtainRowLock(row, waitForLock);
2178 } else {
2179 if (!isRowLocked(lockid)) {
2180 throw new IOException("Invalid row lock");
2181 }
2182 lid = lockid;
2183 }
2184 return lid;
2185 }
2186
2187 public void bulkLoadHFile(String hfilePath, byte[] familyName)
2188 throws IOException {
2189 startRegionOperation();
2190 try {
2191 Store store = getStore(familyName);
2192 if (store == null) {
2193 throw new DoNotRetryIOException(
2194 "No such column family " + Bytes.toStringBinary(familyName));
2195 }
2196 store.bulkLoadHFile(hfilePath);
2197 } finally {
2198 closeRegionOperation();
2199 }
2200
2201 }
2202
2203
2204 @Override
2205 public boolean equals(Object o) {
2206 if (!(o instanceof HRegion)) {
2207 return false;
2208 }
2209 return this.hashCode() == ((HRegion)o).hashCode();
2210 }
2211
2212 @Override
2213 public int hashCode() {
2214 return Bytes.hashCode(this.regionInfo.getRegionName());
2215 }
2216
2217 @Override
2218 public String toString() {
2219 return this.regionInfo.getRegionNameAsString();
2220 }
2221
2222
2223 public Path getTableDir() {
2224 return this.tableDir;
2225 }
2226
2227
2228
2229
2230
2231
2232 class RegionScanner implements InternalScanner {
2233
2234 KeyValueHeap storeHeap = null;
2235 private final byte [] stopRow;
2236 private Filter filter;
2237 private List<KeyValue> results = new ArrayList<KeyValue>();
2238 private int batch;
2239 private int isScan;
2240 private boolean filterClosed = false;
2241 private long readPt;
2242
2243 public HRegionInfo getRegionName() {
2244 return regionInfo;
2245 }
2246 RegionScanner(Scan scan, List<KeyValueScanner> additionalScanners) throws IOException {
2247
2248 this.filter = scan.getFilter();
2249 this.batch = scan.getBatch();
2250 if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
2251 this.stopRow = null;
2252 } else {
2253 this.stopRow = scan.getStopRow();
2254 }
2255
2256
2257 this.isScan = scan.isGetScan() ? -1 : 0;
2258
2259 this.readPt = ReadWriteConsistencyControl.resetThreadReadPoint(rwcc);
2260
2261 List<KeyValueScanner> scanners = new ArrayList<KeyValueScanner>();
2262 if (additionalScanners != null) {
2263 scanners.addAll(additionalScanners);
2264 }
2265
2266 for (Map.Entry<byte[], NavigableSet<byte[]>> entry :
2267 scan.getFamilyMap().entrySet()) {
2268 Store store = stores.get(entry.getKey());
2269 scanners.add(store.getScanner(scan, entry.getValue()));
2270 }
2271 this.storeHeap = new KeyValueHeap(scanners, comparator);
2272 }
2273
2274 RegionScanner(Scan scan) throws IOException {
2275 this(scan, null);
2276 }
2277
2278
2279
2280
2281 protected void resetFilters() {
2282 if (filter != null) {
2283 filter.reset();
2284 }
2285 }
2286
2287 public synchronized boolean next(List<KeyValue> outResults, int limit)
2288 throws IOException {
2289 if (this.filterClosed) {
2290 throw new UnknownScannerException("Scanner was closed (timed out?) " +
2291 "after we renewed it. Could be caused by a very slow scanner " +
2292 "or a lengthy garbage collection");
2293 }
2294 startRegionOperation();
2295 try {
2296
2297
2298 ReadWriteConsistencyControl.setThreadReadPoint(this.readPt);
2299
2300 results.clear();
2301 boolean returnResult = nextInternal(limit);
2302
2303 outResults.addAll(results);
2304 resetFilters();
2305 if (isFilterDone()) {
2306 return false;
2307 }
2308 return returnResult;
2309 } finally {
2310 closeRegionOperation();
2311 }
2312 }
2313
2314 public synchronized boolean next(List<KeyValue> outResults)
2315 throws IOException {
2316
2317 return next(outResults, batch);
2318 }
2319
2320
2321
2322
2323 synchronized boolean isFilterDone() {
2324 return this.filter != null && this.filter.filterAllRemaining();
2325 }
2326
2327 private boolean nextInternal(int limit) throws IOException {
2328 while (true) {
2329 byte [] currentRow = peekRow();
2330 if (isStopRow(currentRow)) {
2331 if (filter != null && filter.hasFilterRow()) {
2332 filter.filterRow(results);
2333 }
2334 if (filter != null && filter.filterRow()) {
2335 results.clear();
2336 }
2337
2338 return false;
2339 } else if (filterRowKey(currentRow)) {
2340 nextRow(currentRow);
2341 } else {
2342 byte [] nextRow;
2343 do {
2344 this.storeHeap.next(results, limit - results.size());
2345 if (limit > 0 && results.size() == limit) {
2346 if (this.filter != null && filter.hasFilterRow()) throw new IncompatibleFilterException(
2347 "Filter with filterRow(List<KeyValue>) incompatible with scan with limit!");
2348 return true;
2349 }
2350 } while (Bytes.equals(currentRow, nextRow = peekRow()));
2351
2352 final boolean stopRow = isStopRow(nextRow);
2353
2354
2355
2356
2357 if (filter != null && filter.hasFilterRow()) {
2358 filter.filterRow(results);
2359 }
2360
2361 if (results.isEmpty() || filterRow()) {
2362
2363
2364
2365
2366
2367 nextRow(currentRow);
2368
2369
2370
2371
2372 if (!stopRow) continue;
2373 }
2374 return !stopRow;
2375 }
2376 }
2377 }
2378
2379 private boolean filterRow() {
2380 return filter != null
2381 && filter.filterRow();
2382 }
2383 private boolean filterRowKey(byte[] row) {
2384 return filter != null
2385 && filter.filterRowKey(row, 0, row.length);
2386 }
2387
2388 protected void nextRow(byte [] currentRow) throws IOException {
2389 while (Bytes.equals(currentRow, peekRow())) {
2390 this.storeHeap.next(MOCKED_LIST);
2391 }
2392 results.clear();
2393 resetFilters();
2394 }
2395
2396 private byte[] peekRow() {
2397 KeyValue kv = this.storeHeap.peek();
2398 return kv == null ? null : kv.getRow();
2399 }
2400
2401 private boolean isStopRow(byte [] currentRow) {
2402 return currentRow == null ||
2403 (stopRow != null &&
2404 comparator.compareRows(stopRow, 0, stopRow.length,
2405 currentRow, 0, currentRow.length) <= isScan);
2406 }
2407
2408 public synchronized void close() {
2409 if (storeHeap != null) {
2410 storeHeap.close();
2411 storeHeap = null;
2412 }
2413 this.filterClosed = true;
2414 }
2415 }
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439 public static HRegion newHRegion(Path tableDir, HLog log, FileSystem fs, Configuration conf,
2440 HRegionInfo regionInfo, FlushRequester flushListener) {
2441 try {
2442 @SuppressWarnings("unchecked")
2443 Class<? extends HRegion> regionClass =
2444 (Class<? extends HRegion>) conf.getClass(HConstants.REGION_IMPL, HRegion.class);
2445
2446 Constructor<? extends HRegion> c =
2447 regionClass.getConstructor(Path.class, HLog.class, FileSystem.class,
2448 Configuration.class, HRegionInfo.class, FlushRequester.class);
2449
2450 return c.newInstance(tableDir, log, fs, conf, regionInfo, flushListener);
2451 } catch (Throwable e) {
2452
2453 throw new IllegalStateException("Could not instantiate a region instance.", e);
2454 }
2455 }
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
2471 final Configuration conf)
2472 throws IOException {
2473 Path tableDir =
2474 HTableDescriptor.getTableDir(rootDir, info.getTableDesc().getName());
2475 Path regionDir = HRegion.getRegionDir(tableDir, info.getEncodedName());
2476 FileSystem fs = FileSystem.get(conf);
2477 fs.mkdirs(regionDir);
2478 HRegion region = HRegion.newHRegion(tableDir,
2479 new HLog(fs, new Path(regionDir, HConstants.HREGION_LOGDIR_NAME),
2480 new Path(regionDir, HConstants.HREGION_OLDLOGDIR_NAME), conf),
2481 fs, conf, info, null);
2482 region.initialize();
2483 return region;
2484 }
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498 public static HRegion openHRegion(final HRegionInfo info, final HLog wal,
2499 final Configuration conf)
2500 throws IOException {
2501 return openHRegion(info, wal, conf, null, null);
2502 }
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518 public static HRegion openHRegion(final HRegionInfo info, final HLog wal,
2519 final Configuration conf, final FlushRequester flusher,
2520 final CancelableProgressable reporter)
2521 throws IOException {
2522 if (LOG.isDebugEnabled()) {
2523 LOG.debug("Opening region: " + info);
2524 }
2525 if (info == null) {
2526 throw new NullPointerException("Passed region info is null");
2527 }
2528 Path dir = HTableDescriptor.getTableDir(FSUtils.getRootDir(conf),
2529 info.getTableDesc().getName());
2530 HRegion r = HRegion.newHRegion(dir, wal, FileSystem.get(conf), conf, info,
2531 flusher);
2532 return r.openHRegion(reporter);
2533 }
2534
2535
2536
2537
2538
2539
2540
2541
2542 protected HRegion openHRegion(final CancelableProgressable reporter)
2543 throws IOException {
2544 checkCompressionCodecs();
2545
2546 long seqid = initialize(reporter);
2547 if (this.log != null) {
2548 this.log.setSequenceNumber(seqid);
2549 }
2550 return this;
2551 }
2552
2553 private void checkCompressionCodecs() throws IOException {
2554 for (HColumnDescriptor fam: regionInfo.getTableDesc().getColumnFamilies()) {
2555 CompressionTest.testCompression(fam.getCompression());
2556 CompressionTest.testCompression(fam.getCompactionCompression());
2557 }
2558 }
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570 public static void addRegionToMETA(HRegion meta, HRegion r)
2571 throws IOException {
2572 meta.checkResources();
2573
2574 byte[] row = r.getRegionName();
2575 Integer lid = meta.obtainRowLock(row);
2576 try {
2577 final List<KeyValue> edits = new ArrayList<KeyValue>(1);
2578 edits.add(new KeyValue(row, HConstants.CATALOG_FAMILY,
2579 HConstants.REGIONINFO_QUALIFIER,
2580 EnvironmentEdgeManager.currentTimeMillis(),
2581 Writables.getBytes(r.getRegionInfo())));
2582 meta.put(HConstants.CATALOG_FAMILY, edits);
2583 } finally {
2584 meta.releaseRowLock(lid);
2585 }
2586 }
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596 public static void deleteRegion(FileSystem fs, Path rootdir, HRegionInfo info)
2597 throws IOException {
2598 deleteRegion(fs, HRegion.getRegionDir(rootdir, info));
2599 }
2600
2601 private static void deleteRegion(FileSystem fs, Path regiondir)
2602 throws IOException {
2603 if (LOG.isDebugEnabled()) {
2604 LOG.debug("DELETING region " + regiondir.toString());
2605 }
2606 if (!fs.delete(regiondir, true)) {
2607 LOG.warn("Failed delete of " + regiondir);
2608 }
2609 }
2610
2611
2612
2613
2614
2615
2616
2617
2618 public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
2619 return new Path(
2620 HTableDescriptor.getTableDir(rootdir, info.getTableDesc().getName()),
2621 info.getEncodedName());
2622 }
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632 public static boolean rowIsInRange(HRegionInfo info, final byte [] row) {
2633 return ((info.getStartKey().length == 0) ||
2634 (Bytes.compareTo(info.getStartKey(), row) <= 0)) &&
2635 ((info.getEndKey().length == 0) ||
2636 (Bytes.compareTo(info.getEndKey(), row) > 0));
2637 }
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648 public static void makeColumnFamilyDirs(FileSystem fs, Path tabledir,
2649 final HRegionInfo hri, byte [] colFamily)
2650 throws IOException {
2651 Path dir = Store.getStoreHomedir(tabledir, hri.getEncodedName(), colFamily);
2652 if (!fs.mkdirs(dir)) {
2653 LOG.warn("Failed to create " + dir);
2654 }
2655 }
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665 public static HRegion mergeAdjacent(final HRegion srcA, final HRegion srcB)
2666 throws IOException {
2667 HRegion a = srcA;
2668 HRegion b = srcB;
2669
2670
2671
2672 if (srcA.getStartKey() == null) {
2673 if (srcB.getStartKey() == null) {
2674 throw new IOException("Cannot merge two regions with null start key");
2675 }
2676
2677 } else if ((srcB.getStartKey() == null) ||
2678 (Bytes.compareTo(srcA.getStartKey(), srcB.getStartKey()) > 0)) {
2679 a = srcB;
2680 b = srcA;
2681 }
2682
2683 if (!(Bytes.compareTo(a.getEndKey(), b.getStartKey()) == 0)) {
2684 throw new IOException("Cannot merge non-adjacent regions");
2685 }
2686 return merge(a, b);
2687 }
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 public static HRegion merge(HRegion a, HRegion b) throws IOException {
2698 if (!a.getRegionInfo().getTableDesc().getNameAsString().equals(
2699 b.getRegionInfo().getTableDesc().getNameAsString())) {
2700 throw new IOException("Regions do not belong to the same table");
2701 }
2702
2703 FileSystem fs = a.getFilesystem();
2704
2705
2706
2707 a.flushcache();
2708 b.flushcache();
2709
2710
2711
2712 a.compactStores(true);
2713 if (LOG.isDebugEnabled()) {
2714 LOG.debug("Files for region: " + a);
2715 listPaths(fs, a.getRegionDir());
2716 }
2717 b.compactStores(true);
2718 if (LOG.isDebugEnabled()) {
2719 LOG.debug("Files for region: " + b);
2720 listPaths(fs, b.getRegionDir());
2721 }
2722
2723 Configuration conf = a.getConf();
2724 HTableDescriptor tabledesc = a.getTableDesc();
2725 HLog log = a.getLog();
2726 Path tableDir = a.getTableDir();
2727
2728
2729 final byte[] startKey =
2730 (a.comparator.matchingRows(a.getStartKey(), 0, a.getStartKey().length,
2731 HConstants.EMPTY_BYTE_ARRAY, 0, HConstants.EMPTY_BYTE_ARRAY.length)
2732 || b.comparator.matchingRows(b.getStartKey(), 0,
2733 b.getStartKey().length, HConstants.EMPTY_BYTE_ARRAY, 0,
2734 HConstants.EMPTY_BYTE_ARRAY.length))
2735 ? HConstants.EMPTY_BYTE_ARRAY
2736 : (a.comparator.compareRows(a.getStartKey(), 0, a.getStartKey().length,
2737 b.getStartKey(), 0, b.getStartKey().length) <= 0
2738 ? a.getStartKey()
2739 : b.getStartKey());
2740 final byte[] endKey =
2741 (a.comparator.matchingRows(a.getEndKey(), 0, a.getEndKey().length,
2742 HConstants.EMPTY_BYTE_ARRAY, 0, HConstants.EMPTY_BYTE_ARRAY.length)
2743 || a.comparator.matchingRows(b.getEndKey(), 0, b.getEndKey().length,
2744 HConstants.EMPTY_BYTE_ARRAY, 0,
2745 HConstants.EMPTY_BYTE_ARRAY.length))
2746 ? HConstants.EMPTY_BYTE_ARRAY
2747 : (a.comparator.compareRows(a.getEndKey(), 0, a.getEndKey().length,
2748 b.getEndKey(), 0, b.getEndKey().length) <= 0
2749 ? b.getEndKey()
2750 : a.getEndKey());
2751
2752 HRegionInfo newRegionInfo = new HRegionInfo(tabledesc, startKey, endKey);
2753 LOG.info("Creating new region " + newRegionInfo.toString());
2754 String encodedName = newRegionInfo.getEncodedName();
2755 Path newRegionDir = HRegion.getRegionDir(a.getTableDir(), encodedName);
2756 if(fs.exists(newRegionDir)) {
2757 throw new IOException("Cannot merge; target file collision at " +
2758 newRegionDir);
2759 }
2760 fs.mkdirs(newRegionDir);
2761
2762 LOG.info("starting merge of regions: " + a + " and " + b +
2763 " into new region " + newRegionInfo.toString() +
2764 " with start key <" + Bytes.toString(startKey) + "> and end key <" +
2765 Bytes.toString(endKey) + ">");
2766
2767
2768 Map<byte [], List<StoreFile>> byFamily =
2769 new TreeMap<byte [], List<StoreFile>>(Bytes.BYTES_COMPARATOR);
2770 byFamily = filesByFamily(byFamily, a.close());
2771 byFamily = filesByFamily(byFamily, b.close());
2772 for (Map.Entry<byte [], List<StoreFile>> es : byFamily.entrySet()) {
2773 byte [] colFamily = es.getKey();
2774 makeColumnFamilyDirs(fs, tableDir, newRegionInfo, colFamily);
2775
2776
2777 List<StoreFile> srcFiles = es.getValue();
2778 if (srcFiles.size() == 2) {
2779 long seqA = srcFiles.get(0).getMaxSequenceId();
2780 long seqB = srcFiles.get(1).getMaxSequenceId();
2781 if (seqA == seqB) {
2782
2783
2784
2785 throw new IOException("Files have same sequenceid: " + seqA);
2786 }
2787 }
2788 for (StoreFile hsf: srcFiles) {
2789 StoreFile.rename(fs, hsf.getPath(),
2790 StoreFile.getUniqueFile(fs, Store.getStoreHomedir(tableDir,
2791 newRegionInfo.getEncodedName(), colFamily)));
2792 }
2793 }
2794 if (LOG.isDebugEnabled()) {
2795 LOG.debug("Files for new region");
2796 listPaths(fs, newRegionDir);
2797 }
2798 HRegion dstRegion = HRegion.newHRegion(tableDir, log, fs, conf, newRegionInfo, null);
2799 dstRegion.initialize();
2800 dstRegion.compactStores();
2801 if (LOG.isDebugEnabled()) {
2802 LOG.debug("Files for new region");
2803 listPaths(fs, dstRegion.getRegionDir());
2804 }
2805 deleteRegion(fs, a.getRegionDir());
2806 deleteRegion(fs, b.getRegionDir());
2807
2808 LOG.info("merge completed. New region is " + dstRegion);
2809
2810 return dstRegion;
2811 }
2812
2813
2814
2815
2816
2817
2818
2819
2820 private static Map<byte [], List<StoreFile>> filesByFamily(
2821 Map<byte [], List<StoreFile>> byFamily, List<StoreFile> storeFiles) {
2822 for (StoreFile src: storeFiles) {
2823 byte [] family = src.getFamily();
2824 List<StoreFile> v = byFamily.get(family);
2825 if (v == null) {
2826 v = new ArrayList<StoreFile>();
2827 byFamily.put(family, v);
2828 }
2829 v.add(src);
2830 }
2831 return byFamily;
2832 }
2833
2834
2835
2836
2837
2838 boolean isMajorCompaction() throws IOException {
2839 for (Store store: this.stores.values()) {
2840 if (store.isMajorCompaction()) {
2841 return true;
2842 }
2843 }
2844 return false;
2845 }
2846
2847
2848
2849
2850
2851
2852
2853
2854 private static void listPaths(FileSystem fs, Path dir) throws IOException {
2855 if (LOG.isDebugEnabled()) {
2856 FileStatus[] stats = fs.listStatus(dir);
2857 if (stats == null || stats.length == 0) {
2858 return;
2859 }
2860 for (int i = 0; i < stats.length; i++) {
2861 String path = stats[i].getPath().toString();
2862 if (stats[i].isDir()) {
2863 LOG.debug("d " + path);
2864 listPaths(fs, stats[i].getPath());
2865 } else {
2866 LOG.debug("f " + path + " size=" + stats[i].getLen());
2867 }
2868 }
2869 }
2870 }
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882 public Result get(final Get get, final Integer lockid) throws IOException {
2883
2884 if (get.hasFamilies()) {
2885 for (byte [] family: get.familySet()) {
2886 checkFamily(family);
2887 }
2888 } else {
2889 for (byte[] family: regionInfo.getTableDesc().getFamiliesKeys()) {
2890 get.addFamily(family);
2891 }
2892 }
2893 List<KeyValue> result = get(get);
2894
2895 return new Result(result);
2896 }
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912 private List<KeyValue> getLastIncrement(final Get get) throws IOException {
2913 InternalScan iscan = new InternalScan(get);
2914
2915 List<KeyValue> results = new ArrayList<KeyValue>();
2916
2917
2918 iscan.checkOnlyMemStore();
2919 InternalScanner scanner = null;
2920 try {
2921 scanner = getScanner(iscan);
2922 scanner.next(results);
2923 } finally {
2924 if (scanner != null)
2925 scanner.close();
2926 }
2927
2928
2929 int expected = 0;
2930 Map<byte[], NavigableSet<byte[]>> familyMap = get.getFamilyMap();
2931 for (NavigableSet<byte[]> qfs : familyMap.values()) {
2932 expected += qfs.size();
2933 }
2934
2935
2936 if (results.size() == expected) {
2937 return results;
2938 }
2939
2940
2941 if (results != null && !results.isEmpty()) {
2942
2943 for (KeyValue kv : results) {
2944 byte [] family = kv.getFamily();
2945 NavigableSet<byte[]> qfs = familyMap.get(family);
2946 qfs.remove(kv.getQualifier());
2947 if (qfs.isEmpty()) familyMap.remove(family);
2948 expected--;
2949 }
2950
2951 Get newGet = new Get(get.getRow());
2952 for (Map.Entry<byte[], NavigableSet<byte[]>> f : familyMap.entrySet()) {
2953 byte [] family = f.getKey();
2954 for (byte [] qualifier : f.getValue()) {
2955 newGet.addColumn(family, qualifier);
2956 }
2957 }
2958 newGet.setTimeRange(get.getTimeRange().getMin(),
2959 get.getTimeRange().getMax());
2960 iscan = new InternalScan(newGet);
2961 }
2962
2963
2964 List<KeyValue> fileResults = new ArrayList<KeyValue>();
2965 iscan.checkOnlyStoreFiles();
2966 scanner = null;
2967 try {
2968 scanner = getScanner(iscan);
2969 scanner.next(fileResults);
2970 } finally {
2971 if (scanner != null)
2972 scanner.close();
2973 }
2974
2975
2976 results.addAll(fileResults);
2977 Collections.sort(results, KeyValue.COMPARATOR);
2978 return results;
2979 }
2980
2981
2982
2983
2984 private List<KeyValue> get(final Get get) throws IOException {
2985 Scan scan = new Scan(get);
2986
2987 List<KeyValue> results = new ArrayList<KeyValue>();
2988
2989 InternalScanner scanner = null;
2990 try {
2991 scanner = getScanner(scan);
2992 scanner.next(results);
2993 } finally {
2994 if (scanner != null)
2995 scanner.close();
2996 }
2997 return results;
2998 }
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011 public Result increment(Increment increment, Integer lockid,
3012 boolean writeToWAL)
3013 throws IOException {
3014
3015 byte [] row = increment.getRow();
3016 checkRow(row);
3017 TimeRange tr = increment.getTimeRange();
3018 boolean flush = false;
3019 WALEdit walEdits = null;
3020 List<KeyValue> allKVs = new ArrayList<KeyValue>(increment.numColumns());
3021 List<KeyValue> kvs = new ArrayList<KeyValue>(increment.numColumns());
3022 long now = EnvironmentEdgeManager.currentTimeMillis();
3023 long size = 0;
3024
3025
3026 startRegionOperation();
3027 try {
3028 Integer lid = getLock(lockid, row, true);
3029 this.updatesLock.readLock().lock();
3030 try {
3031
3032 for (Map.Entry<byte [], NavigableMap<byte [], Long>> family :
3033 increment.getFamilyMap().entrySet()) {
3034
3035 Store store = stores.get(family.getKey());
3036
3037
3038 Get get = new Get(row);
3039 for (Map.Entry<byte [], Long> column : family.getValue().entrySet()) {
3040 get.addColumn(family.getKey(), column.getKey());
3041 }
3042 get.setTimeRange(tr.getMin(), tr.getMax());
3043 List<KeyValue> results = getLastIncrement(get);
3044
3045
3046
3047 int idx = 0;
3048 for (Map.Entry<byte [], Long> column : family.getValue().entrySet()) {
3049 long amount = column.getValue();
3050 if (idx < results.size() &&
3051 results.get(idx).matchingQualifier(column.getKey())) {
3052 amount += Bytes.toLong(results.get(idx).getValue());
3053 idx++;
3054 }
3055
3056
3057 KeyValue newKV = new KeyValue(row, family.getKey(), column.getKey(),
3058 now, Bytes.toBytes(amount));
3059 kvs.add(newKV);
3060
3061
3062 if (writeToWAL) {
3063 if (walEdits == null) {
3064 walEdits = new WALEdit();
3065 }
3066 walEdits.add(newKV);
3067 }
3068 }
3069
3070
3071 size += store.upsert(kvs);
3072 allKVs.addAll(kvs);
3073 kvs.clear();
3074 }
3075
3076
3077 if (writeToWAL) {
3078 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
3079 walEdits, now);
3080 }
3081
3082 size = this.memstoreSize.addAndGet(size);
3083 flush = isFlushSize(size);
3084 } finally {
3085 this.updatesLock.readLock().unlock();
3086 releaseRowLock(lid);
3087 }
3088 } finally {
3089 closeRegionOperation();
3090 }
3091
3092 if (flush) {
3093
3094 requestFlush();
3095 }
3096
3097 return new Result(allKVs);
3098 }
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110 public long incrementColumnValue(byte [] row, byte [] family,
3111 byte [] qualifier, long amount, boolean writeToWAL)
3112 throws IOException {
3113 checkRow(row);
3114 boolean flush = false;
3115
3116 long result = amount;
3117 startRegionOperation();
3118 try {
3119 Integer lid = obtainRowLock(row);
3120 this.updatesLock.readLock().lock();
3121 try {
3122 Store store = stores.get(family);
3123
3124
3125 Get get = new Get(row);
3126 get.addColumn(family, qualifier);
3127
3128 List<KeyValue> results = getLastIncrement(get);
3129
3130 if (!results.isEmpty()) {
3131 KeyValue kv = results.get(0);
3132 byte [] buffer = kv.getBuffer();
3133 int valueOffset = kv.getValueOffset();
3134 result += Bytes.toLong(buffer, valueOffset, Bytes.SIZEOF_LONG);
3135 }
3136
3137
3138 KeyValue newKv = new KeyValue(row, family,
3139 qualifier, EnvironmentEdgeManager.currentTimeMillis(),
3140 Bytes.toBytes(result));
3141
3142
3143 if (writeToWAL) {
3144 long now = EnvironmentEdgeManager.currentTimeMillis();
3145 WALEdit walEdit = new WALEdit();
3146 walEdit.add(newKv);
3147 this.log.append(regionInfo, regionInfo.getTableDesc().getName(),
3148 walEdit, now);
3149 }
3150
3151
3152
3153
3154 long size = store.updateColumnValue(row, family, qualifier, result);
3155
3156 size = this.memstoreSize.addAndGet(size);
3157 flush = isFlushSize(size);
3158 } finally {
3159 this.updatesLock.readLock().unlock();
3160 releaseRowLock(lid);
3161 }
3162 } finally {
3163 closeRegionOperation();
3164 }
3165
3166 if (flush) {
3167
3168 requestFlush();
3169 }
3170
3171 return result;
3172 }
3173
3174
3175
3176
3177
3178
3179 private void checkFamily(final byte [] family)
3180 throws NoSuchColumnFamilyException {
3181 if(!regionInfo.getTableDesc().hasFamily(family)) {
3182 throw new NoSuchColumnFamilyException("Column family " +
3183 Bytes.toString(family) + " does not exist in region " + this
3184 + " in table " + regionInfo.getTableDesc());
3185 }
3186 }
3187
3188 public static final long FIXED_OVERHEAD = ClassSize.align(
3189 (4 * Bytes.SIZEOF_LONG) + Bytes.SIZEOF_BOOLEAN +
3190 (21 * ClassSize.REFERENCE) + ClassSize.OBJECT + Bytes.SIZEOF_INT);
3191
3192 public static final long DEEP_OVERHEAD = ClassSize.align(FIXED_OVERHEAD +
3193 (ClassSize.OBJECT * 2) + (2 * ClassSize.ATOMIC_BOOLEAN) +
3194 ClassSize.ATOMIC_LONG + ClassSize.ATOMIC_INTEGER +
3195
3196
3197 ClassSize.TREEMAP +
3198
3199
3200 ClassSize.TREEMAP +
3201
3202 ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY +
3203 ClassSize.align(ClassSize.OBJECT +
3204 (5 * Bytes.SIZEOF_BOOLEAN)) +
3205 (3 * ClassSize.REENTRANT_LOCK));
3206
3207 public long heapSize() {
3208 long heapSize = DEEP_OVERHEAD;
3209 for(Store store : this.stores.values()) {
3210 heapSize += store.heapSize();
3211 }
3212 return heapSize;
3213 }
3214
3215
3216
3217
3218
3219 private static void printUsageAndExit(final String message) {
3220 if (message != null && message.length() > 0) System.out.println(message);
3221 System.out.println("Usage: HRegion CATLALOG_TABLE_DIR [major_compact]");
3222 System.out.println("Options:");
3223 System.out.println(" major_compact Pass this option to major compact " +
3224 "passed region.");
3225 System.out.println("Default outputs scan of passed region.");
3226 System.exit(1);
3227 }
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239 private static void processTable(final FileSystem fs, final Path p,
3240 final HLog log, final Configuration c,
3241 final boolean majorCompact)
3242 throws IOException {
3243 HRegion region = null;
3244 String rootStr = Bytes.toString(HConstants.ROOT_TABLE_NAME);
3245 String metaStr = Bytes.toString(HConstants.META_TABLE_NAME);
3246
3247 if (p.getName().startsWith(rootStr)) {
3248 region = HRegion.newHRegion(p, log, fs, c, HRegionInfo.ROOT_REGIONINFO, null);
3249 } else if (p.getName().startsWith(metaStr)) {
3250 region = HRegion.newHRegion(p, log, fs, c, HRegionInfo.FIRST_META_REGIONINFO,
3251 null);
3252 } else {
3253 throw new IOException("Not a known catalog table: " + p.toString());
3254 }
3255 try {
3256 region.initialize();
3257 if (majorCompact) {
3258 region.compactStores(true);
3259 } else {
3260
3261 Scan scan = new Scan();
3262
3263 InternalScanner scanner = region.getScanner(scan);
3264 try {
3265 List<KeyValue> kvs = new ArrayList<KeyValue>();
3266 boolean done = false;
3267 do {
3268 kvs.clear();
3269 done = scanner.next(kvs);
3270 if (kvs.size() > 0) LOG.info(kvs);
3271 } while (done);
3272 } finally {
3273 scanner.close();
3274 }
3275
3276 }
3277 } finally {
3278 region.close();
3279 }
3280 }
3281
3282
3283
3284
3285
3286
3287 public boolean shouldSplit(boolean b) {
3288 boolean old = this.splitRequest;
3289 this.splitRequest = b;
3290 return old;
3291 }
3292
3293
3294
3295
3296 protected void prepareToSplit() {
3297
3298 }
3299
3300
3301
3302
3303 public int getCompactPriority() {
3304 int count = Integer.MAX_VALUE;
3305 for(Store store : stores.values()) {
3306 count = Math.min(count, store.getCompactPriority());
3307 }
3308 return count;
3309 }
3310
3311
3312
3313
3314
3315
3316 public boolean hasTooManyStoreFiles() {
3317 for(Store store : stores.values()) {
3318 if(store.hasTooManyStoreFiles()) {
3319 return true;
3320 }
3321 }
3322 return false;
3323 }
3324
3325
3326
3327
3328
3329
3330
3331
3332 private void startRegionOperation() throws NotServingRegionException {
3333 if (this.closing.get()) {
3334 throw new NotServingRegionException(regionInfo.getRegionNameAsString() +
3335 " is closing");
3336 }
3337 lock.readLock().lock();
3338 if (this.closed.get()) {
3339 lock.readLock().unlock();
3340 throw new NotServingRegionException(regionInfo.getRegionNameAsString() +
3341 " is closed");
3342 }
3343 }
3344
3345
3346
3347
3348
3349 private void closeRegionOperation(){
3350 lock.readLock().unlock();
3351 }
3352
3353
3354
3355
3356 private static final List<KeyValue> MOCKED_LIST = new AbstractList<KeyValue>() {
3357
3358 @Override
3359 public void add(int index, KeyValue element) {
3360
3361 }
3362
3363 @Override
3364 public boolean addAll(int index, Collection<? extends KeyValue> c) {
3365 return false;
3366 }
3367
3368 @Override
3369 public KeyValue get(int index) {
3370 throw new UnsupportedOperationException();
3371 }
3372
3373 @Override
3374 public int size() {
3375 return 0;
3376 }
3377 };
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390 public static void main(String[] args) throws IOException {
3391 if (args.length < 1) {
3392 printUsageAndExit(null);
3393 }
3394 boolean majorCompact = false;
3395 if (args.length > 1) {
3396 if (!args[1].toLowerCase().startsWith("major")) {
3397 printUsageAndExit("ERROR: Unrecognized option <" + args[1] + ">");
3398 }
3399 majorCompact = true;
3400 }
3401 final Path tableDir = new Path(args[0]);
3402 final Configuration c = HBaseConfiguration.create();
3403 final FileSystem fs = FileSystem.get(c);
3404 final Path logdir = new Path(c.get("hbase.tmp.dir"),
3405 "hlog" + tableDir.getName()
3406 + EnvironmentEdgeManager.currentTimeMillis());
3407 final Path oldLogDir = new Path(c.get("hbase.tmp.dir"),
3408 HConstants.HREGION_OLDLOGDIR_NAME);
3409 final HLog log = new HLog(fs, logdir, oldLogDir, c);
3410 try {
3411 processTable(fs, tableDir, log, c, majorCompact);
3412 } finally {
3413 log.close();
3414 BlockCache bc = StoreFile.getBlockCache(c);
3415 if (bc != null) bc.shutdown();
3416 }
3417 }
3418 }