1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.EOFException;
22 import java.io.FileNotFoundException;
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.io.UnsupportedEncodingException;
26 import java.lang.reflect.Constructor;
27 import java.text.ParseException;
28 import java.util.AbstractList;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.Iterator;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.NavigableMap;
38 import java.util.NavigableSet;
39 import java.util.RandomAccess;
40 import java.util.Set;
41 import java.util.TreeMap;
42 import java.util.UUID;
43 import java.util.concurrent.Callable;
44 import java.util.concurrent.CompletionService;
45 import java.util.concurrent.ConcurrentHashMap;
46 import java.util.concurrent.ConcurrentSkipListMap;
47 import java.util.concurrent.CountDownLatch;
48 import java.util.concurrent.ExecutionException;
49 import java.util.concurrent.ExecutorCompletionService;
50 import java.util.concurrent.ExecutorService;
51 import java.util.concurrent.Executors;
52 import java.util.concurrent.Future;
53 import java.util.concurrent.FutureTask;
54 import java.util.concurrent.ThreadFactory;
55 import java.util.concurrent.ThreadPoolExecutor;
56 import java.util.concurrent.TimeUnit;
57 import java.util.concurrent.TimeoutException;
58 import java.util.concurrent.atomic.AtomicBoolean;
59 import java.util.concurrent.atomic.AtomicInteger;
60 import java.util.concurrent.atomic.AtomicLong;
61 import java.util.concurrent.locks.Lock;
62 import java.util.concurrent.locks.ReentrantReadWriteLock;
63
64 import org.apache.commons.logging.Log;
65 import org.apache.commons.logging.LogFactory;
66 import org.apache.hadoop.conf.Configuration;
67 import org.apache.hadoop.fs.FileStatus;
68 import org.apache.hadoop.fs.FileSystem;
69 import org.apache.hadoop.fs.Path;
70 import org.apache.hadoop.hbase.Cell;
71 import org.apache.hadoop.hbase.CellScanner;
72 import org.apache.hadoop.hbase.CellUtil;
73 import org.apache.hadoop.hbase.CompoundConfiguration;
74 import org.apache.hadoop.hbase.DoNotRetryIOException;
75 import org.apache.hadoop.hbase.DroppedSnapshotException;
76 import org.apache.hadoop.hbase.HBaseConfiguration;
77 import org.apache.hadoop.hbase.HColumnDescriptor;
78 import org.apache.hadoop.hbase.HConstants;
79 import org.apache.hadoop.hbase.HConstants.OperationStatusCode;
80 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
81 import org.apache.hadoop.hbase.HRegionInfo;
82 import org.apache.hadoop.hbase.HTableDescriptor;
83 import org.apache.hadoop.hbase.KeyValue;
84 import org.apache.hadoop.hbase.KeyValueUtil;
85 import org.apache.hadoop.hbase.NamespaceDescriptor;
86 import org.apache.hadoop.hbase.NotServingRegionException;
87 import org.apache.hadoop.hbase.RegionTooBusyException;
88 import org.apache.hadoop.hbase.TableName;
89 import org.apache.hadoop.hbase.Tag;
90 import org.apache.hadoop.hbase.TagType;
91 import org.apache.hadoop.hbase.UnknownScannerException;
92 import org.apache.hadoop.hbase.backup.HFileArchiver;
93 import org.apache.hadoop.hbase.classification.InterfaceAudience;
94 import org.apache.hadoop.hbase.client.Append;
95 import org.apache.hadoop.hbase.client.Delete;
96 import org.apache.hadoop.hbase.client.Durability;
97 import org.apache.hadoop.hbase.client.Get;
98 import org.apache.hadoop.hbase.client.Increment;
99 import org.apache.hadoop.hbase.client.IsolationLevel;
100 import org.apache.hadoop.hbase.client.Mutation;
101 import org.apache.hadoop.hbase.client.Put;
102 import org.apache.hadoop.hbase.client.Result;
103 import org.apache.hadoop.hbase.client.RowMutations;
104 import org.apache.hadoop.hbase.client.Scan;
105 import org.apache.hadoop.hbase.coprocessor.RegionObserver;
106 import org.apache.hadoop.hbase.errorhandling.ForeignExceptionSnare;
107 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
108 import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
109 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
110 import org.apache.hadoop.hbase.filter.ByteArrayComparable;
111 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
112 import org.apache.hadoop.hbase.filter.FilterWrapper;
113 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
114 import org.apache.hadoop.hbase.io.HeapSize;
115 import org.apache.hadoop.hbase.io.TimeRange;
116 import org.apache.hadoop.hbase.io.hfile.BlockCache;
117 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
118 import org.apache.hadoop.hbase.ipc.CallerDisconnectedException;
119 import org.apache.hadoop.hbase.ipc.RpcCallContext;
120 import org.apache.hadoop.hbase.ipc.RpcServer;
121 import org.apache.hadoop.hbase.master.AssignmentManager;
122 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
123 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
124 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
125 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
126 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
127 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
128 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
129 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
130 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
131 import org.apache.hadoop.hbase.regionserver.MultiVersionConsistencyControl.WriteEntry;
132 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
133 import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController;
134 import org.apache.hadoop.hbase.regionserver.compactions.NoLimitCompactionThroughputController;
135 import org.apache.hadoop.hbase.regionserver.wal.HLog;
136 import org.apache.hadoop.hbase.regionserver.wal.HLogFactory;
137 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
138 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
139 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter.MutationReplay;
140 import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
141 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
142 import org.apache.hadoop.hbase.security.User;
143 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
144 import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
145 import org.apache.hadoop.hbase.util.Bytes;
146 import org.apache.hadoop.hbase.util.CancelableProgressable;
147 import org.apache.hadoop.hbase.util.ClassSize;
148 import org.apache.hadoop.hbase.util.CompressionTest;
149 import org.apache.hadoop.hbase.util.EncryptionTest;
150 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
151 import org.apache.hadoop.hbase.util.FSTableDescriptors;
152 import org.apache.hadoop.hbase.util.FSUtils;
153 import org.apache.hadoop.hbase.util.HashedBytes;
154 import org.apache.hadoop.hbase.util.Pair;
155 import org.apache.hadoop.hbase.util.Threads;
156 import org.apache.hadoop.io.MultipleIOException;
157 import org.apache.hadoop.util.StringUtils;
158 import org.cliffc.high_scale_lib.Counter;
159 import org.cloudera.htrace.Trace;
160 import org.cloudera.htrace.TraceScope;
161
162 import com.google.common.annotations.VisibleForTesting;
163 import com.google.common.base.Preconditions;
164 import com.google.common.collect.Lists;
165 import com.google.common.collect.Maps;
166 import com.google.common.io.Closeables;
167 import com.google.protobuf.Descriptors;
168 import com.google.protobuf.Message;
169 import com.google.protobuf.RpcCallback;
170 import com.google.protobuf.RpcController;
171 import com.google.protobuf.Service;
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 @InterfaceAudience.Private
210 public class HRegion implements HeapSize {
211 public static final Log LOG = LogFactory.getLog(HRegion.class);
212
213 public static final String LOAD_CFS_ON_DEMAND_CONFIG_KEY =
214 "hbase.hregion.scan.loadColumnFamiliesOnDemand";
215
216
217
218
219
220 private static final Durability DEFAULT_DURABLITY = Durability.SYNC_WAL;
221
222 final AtomicBoolean closed = new AtomicBoolean(false);
223
224
225
226
227
228 final AtomicBoolean closing = new AtomicBoolean(false);
229
230 protected volatile long completeSequenceId = -1L;
231
232
233
234
235
236
237 private final AtomicLong sequenceId = new AtomicLong(-1L);
238
239
240
241
242
243
244
245 public enum Operation {
246 ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
247 REPLAY_BATCH_MUTATE, COMPACT_REGION
248 }
249
250
251
252
253
254
255
256
257
258
259 private final ConcurrentHashMap<HashedBytes, RowLockContext> lockedRows =
260 new ConcurrentHashMap<HashedBytes, RowLockContext>();
261
262 protected final Map<byte[], Store> stores = new ConcurrentSkipListMap<byte[], Store>(
263 Bytes.BYTES_RAWCOMPARATOR);
264
265
266 private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
267
268 public final AtomicLong memstoreSize = new AtomicLong(0);
269
270
271 final Counter numMutationsWithoutWAL = new Counter();
272 final Counter dataInMemoryWithoutWAL = new Counter();
273
274
275 final Counter checkAndMutateChecksPassed = new Counter();
276 final Counter checkAndMutateChecksFailed = new Counter();
277
278
279 final Counter readRequestsCount = new Counter();
280 final Counter writeRequestsCount = new Counter();
281
282
283 private final Counter blockedRequestsCount = new Counter();
284
285
286
287
288 public long getBlockedRequestsCount() {
289 return this.blockedRequestsCount.get();
290 }
291
292
293 final AtomicLong compactionsFinished = new AtomicLong(0L);
294 final AtomicLong compactionNumFilesCompacted = new AtomicLong(0L);
295 final AtomicLong compactionNumBytesCompacted = new AtomicLong(0L);
296
297
298 private final HLog log;
299 private final HRegionFileSystem fs;
300 protected final Configuration conf;
301 private final Configuration baseConf;
302 private final KeyValue.KVComparator comparator;
303 private final int rowLockWaitDuration;
304 static final int DEFAULT_ROWLOCK_WAIT_DURATION = 30000;
305
306
307
308
309
310
311
312 final long busyWaitDuration;
313 static final long DEFAULT_BUSY_WAIT_DURATION = HConstants.DEFAULT_HBASE_RPC_TIMEOUT;
314
315
316
317
318 final int maxBusyWaitMultiplier;
319
320
321
322 final long maxBusyWaitDuration;
323
324
325 static final long DEFAULT_ROW_PROCESSOR_TIMEOUT = 60 * 1000L;
326 final ExecutorService rowProcessorExecutor = Executors.newCachedThreadPool();
327
328 private final ConcurrentHashMap<RegionScanner, Long> scannerReadPoints;
329
330
331
332
333 private long openSeqNum = HConstants.NO_SEQNUM;
334
335
336
337
338
339 private boolean isLoadingCfsOnDemandDefault = false;
340
341 private final AtomicInteger majorInProgress = new AtomicInteger(0);
342 private final AtomicInteger minorInProgress = new AtomicInteger(0);
343
344
345
346
347
348
349
350 Map<byte[], Long> maxSeqIdInStores = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
351
352
353
354
355 private boolean disallowWritesInRecovering = false;
356
357
358 private volatile boolean isRecovering = false;
359
360
361
362
363
364
365 public long getSmallestReadPoint() {
366 long minimumReadPoint;
367
368
369
370 synchronized(scannerReadPoints) {
371 minimumReadPoint = mvcc.memstoreReadPoint();
372
373 for (Long readPoint: this.scannerReadPoints.values()) {
374 if (readPoint < minimumReadPoint) {
375 minimumReadPoint = readPoint;
376 }
377 }
378 }
379 return minimumReadPoint;
380 }
381
382
383
384
385 static class WriteState {
386
387 volatile boolean flushing = false;
388
389 volatile boolean flushRequested = false;
390
391 volatile int compacting = 0;
392
393 volatile boolean writesEnabled = true;
394
395 volatile boolean readOnly = false;
396
397
398
399
400
401
402 synchronized void setReadOnly(final boolean onOff) {
403 this.writesEnabled = !onOff;
404 this.readOnly = onOff;
405 }
406
407 boolean isReadOnly() {
408 return this.readOnly;
409 }
410
411 boolean isFlushRequested() {
412 return this.flushRequested;
413 }
414
415 static final long HEAP_SIZE = ClassSize.align(
416 ClassSize.OBJECT + 5 * Bytes.SIZEOF_BOOLEAN);
417 }
418
419
420
421
422
423
424
425 public static class FlushResult {
426 enum Result {
427 FLUSHED_NO_COMPACTION_NEEDED,
428 FLUSHED_COMPACTION_NEEDED,
429
430
431 CANNOT_FLUSH_MEMSTORE_EMPTY,
432 CANNOT_FLUSH
433
434 }
435
436 final Result result;
437 final String failureReason;
438 final long flushSequenceId;
439
440
441
442
443
444
445
446
447 FlushResult(Result result, long flushSequenceId) {
448 this(result, flushSequenceId, null);
449 assert result == Result.FLUSHED_NO_COMPACTION_NEEDED || result == Result
450 .FLUSHED_COMPACTION_NEEDED;
451 }
452
453
454
455
456
457
458 FlushResult(Result result, String failureReason) {
459 this(result, -1, failureReason);
460 assert result == Result.CANNOT_FLUSH_MEMSTORE_EMPTY || result == Result.CANNOT_FLUSH;
461 }
462
463
464
465
466
467
468
469 FlushResult(Result result, long flushSequenceId, String failureReason) {
470 this.result = result;
471 this.flushSequenceId = flushSequenceId;
472 this.failureReason = failureReason;
473 }
474
475
476
477
478
479
480 public boolean isFlushSucceeded() {
481 return result == Result.FLUSHED_NO_COMPACTION_NEEDED || result == Result
482 .FLUSHED_COMPACTION_NEEDED;
483 }
484
485
486
487
488
489 public boolean isCompactionNeeded() {
490 return result == Result.FLUSHED_COMPACTION_NEEDED;
491 }
492 }
493
494 final WriteState writestate = new WriteState();
495
496 long memstoreFlushSize;
497 final long timestampSlop;
498 final long rowProcessorTimeout;
499 private volatile long lastFlushTime;
500 final RegionServerServices rsServices;
501 private RegionServerAccounting rsAccounting;
502 private List<Pair<Long, Long>> recentFlushes = new ArrayList<Pair<Long,Long>>();
503 private long flushCheckInterval;
504
505 private long flushPerChanges;
506 private long blockingMemStoreSize;
507 final long threadWakeFrequency;
508
509 final ReentrantReadWriteLock lock =
510 new ReentrantReadWriteLock();
511
512
513 private final ReentrantReadWriteLock updatesLock =
514 new ReentrantReadWriteLock();
515 private boolean splitRequest;
516 private byte[] explicitSplitPoint = null;
517
518 private final MultiVersionConsistencyControl mvcc =
519 new MultiVersionConsistencyControl();
520
521
522 private RegionCoprocessorHost coprocessorHost;
523
524 private HTableDescriptor htableDescriptor = null;
525 private RegionSplitPolicy splitPolicy;
526
527 private final MetricsRegion metricsRegion;
528 private final MetricsRegionWrapperImpl metricsRegionWrapper;
529 private final Durability durability;
530 private final boolean regionStatsEnabled;
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553 @Deprecated
554 public HRegion(final Path tableDir, final HLog log, final FileSystem fs,
555 final Configuration confParam, final HRegionInfo regionInfo,
556 final HTableDescriptor htd, final RegionServerServices rsServices) {
557 this(new HRegionFileSystem(confParam, fs, tableDir, regionInfo),
558 log, confParam, htd, rsServices);
559 }
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578 public HRegion(final HRegionFileSystem fs, final HLog log, final Configuration confParam,
579 final HTableDescriptor htd, final RegionServerServices rsServices) {
580 if (htd == null) {
581 throw new IllegalArgumentException("Need table descriptor");
582 }
583
584 if (confParam instanceof CompoundConfiguration) {
585 throw new IllegalArgumentException("Need original base configuration");
586 }
587
588 this.comparator = fs.getRegionInfo().getComparator();
589 this.log = log;
590 this.fs = fs;
591
592
593 this.baseConf = confParam;
594 this.conf = new CompoundConfiguration()
595 .add(confParam)
596 .addStringMap(htd.getConfiguration())
597 .addWritableMap(htd.getValues());
598 this.flushCheckInterval = conf.getInt(MEMSTORE_PERIODIC_FLUSH_INTERVAL,
599 DEFAULT_CACHE_FLUSH_INTERVAL);
600 this.flushPerChanges = conf.getLong(MEMSTORE_FLUSH_PER_CHANGES, DEFAULT_FLUSH_PER_CHANGES);
601 if (this.flushPerChanges > MAX_FLUSH_PER_CHANGES) {
602 throw new IllegalArgumentException(MEMSTORE_FLUSH_PER_CHANGES + " can not exceed "
603 + MAX_FLUSH_PER_CHANGES);
604 }
605
606 this.rowLockWaitDuration = conf.getInt("hbase.rowlock.wait.duration",
607 DEFAULT_ROWLOCK_WAIT_DURATION);
608
609 this.isLoadingCfsOnDemandDefault = conf.getBoolean(LOAD_CFS_ON_DEMAND_CONFIG_KEY, true);
610 this.htableDescriptor = htd;
611 this.rsServices = rsServices;
612 this.threadWakeFrequency = conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
613 setHTableSpecificConf();
614 this.scannerReadPoints = new ConcurrentHashMap<RegionScanner, Long>();
615
616 this.busyWaitDuration = conf.getLong(
617 "hbase.busy.wait.duration", DEFAULT_BUSY_WAIT_DURATION);
618 this.maxBusyWaitMultiplier = conf.getInt("hbase.busy.wait.multiplier.max", 2);
619 if (busyWaitDuration * maxBusyWaitMultiplier <= 0L) {
620 throw new IllegalArgumentException("Invalid hbase.busy.wait.duration ("
621 + busyWaitDuration + ") or hbase.busy.wait.multiplier.max ("
622 + maxBusyWaitMultiplier + "). Their product should be positive");
623 }
624 this.maxBusyWaitDuration = conf.getLong("hbase.ipc.client.call.purge.timeout",
625 conf.getLong("ipc.client.call.purge.timeout", 2 * HConstants.DEFAULT_HBASE_RPC_TIMEOUT));
626
627
628
629
630
631
632
633 this.timestampSlop = conf.getLong(
634 "hbase.hregion.keyvalue.timestamp.slop.millisecs",
635 HConstants.LATEST_TIMESTAMP);
636
637
638
639
640
641 this.rowProcessorTimeout = conf.getLong(
642 "hbase.hregion.row.processor.timeout", DEFAULT_ROW_PROCESSOR_TIMEOUT);
643 this.durability = htd.getDurability() == Durability.USE_DEFAULT
644 ? DEFAULT_DURABLITY
645 : htd.getDurability();
646 if (rsServices != null) {
647 this.rsAccounting = this.rsServices.getRegionServerAccounting();
648
649
650 this.coprocessorHost = new RegionCoprocessorHost(this, rsServices, conf);
651 this.metricsRegionWrapper = new MetricsRegionWrapperImpl(this);
652 this.metricsRegion = new MetricsRegion(this.metricsRegionWrapper);
653
654 Map<String, HRegion> recoveringRegions = rsServices.getRecoveringRegions();
655 String encodedName = getRegionInfo().getEncodedName();
656 if (recoveringRegions != null && recoveringRegions.containsKey(encodedName)) {
657 this.isRecovering = true;
658 recoveringRegions.put(encodedName, this);
659 }
660 } else {
661 this.metricsRegionWrapper = null;
662 this.metricsRegion = null;
663 }
664 if (LOG.isDebugEnabled()) {
665
666 LOG.debug("Instantiated " + this);
667 }
668
669
670 this.disallowWritesInRecovering =
671 conf.getBoolean(HConstants.DISALLOW_WRITES_IN_RECOVERING,
672 HConstants.DEFAULT_DISALLOW_WRITES_IN_RECOVERING_CONFIG);
673
674
675 this.regionStatsEnabled = htd.getTableName().getNamespaceAsString().equals(
676 NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR) ? false :
677 conf.getBoolean(HConstants.ENABLE_CLIENT_BACKPRESSURE,
678 HConstants.DEFAULT_ENABLE_CLIENT_BACKPRESSURE);
679 }
680
681 void setHTableSpecificConf() {
682 if (this.htableDescriptor == null) return;
683 long flushSize = this.htableDescriptor.getMemStoreFlushSize();
684
685 if (flushSize <= 0) {
686 flushSize = conf.getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
687 HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
688 }
689 this.memstoreFlushSize = flushSize;
690 this.blockingMemStoreSize = this.memstoreFlushSize *
691 conf.getLong(HConstants.HREGION_MEMSTORE_BLOCK_MULTIPLIER,
692 HConstants.DEFAULT_HREGION_MEMSTORE_BLOCK_MULTIPLIER);
693 }
694
695
696
697
698
699
700
701
702
703 @Deprecated
704 public long initialize() throws IOException {
705 return initialize(null);
706 }
707
708
709
710
711
712
713
714
715 private long initialize(final CancelableProgressable reporter) throws IOException {
716 MonitoredTask status = TaskMonitor.get().createStatus("Initializing region " + this);
717 long nextSeqId = -1;
718 try {
719 nextSeqId = initializeRegionInternals(reporter, status);
720 return nextSeqId;
721 } finally {
722
723
724 if (nextSeqId == -1) {
725 status
726 .abort("Exception during region " + this.getRegionNameAsString() + " initialization.");
727 }
728 }
729 }
730
731 private long initializeRegionInternals(final CancelableProgressable reporter,
732 final MonitoredTask status) throws IOException, UnsupportedEncodingException {
733 if (coprocessorHost != null) {
734 status.setStatus("Running coprocessor pre-open hook");
735 coprocessorHost.preOpen();
736 }
737
738
739 status.setStatus("Writing region info on filesystem");
740 fs.checkRegionInfoOnFilesystem();
741
742
743 status.setStatus("Cleaning up temporary data from old regions");
744 fs.cleanupTempDir();
745
746
747 status.setStatus("Initializing all the Stores");
748 long maxSeqId = initializeRegionStores(reporter, status);
749
750 status.setStatus("Cleaning up detritus from prior splits");
751
752
753
754 fs.cleanupAnySplitDetritus();
755 fs.cleanupMergesDir();
756
757 this.writestate.setReadOnly(this.htableDescriptor.isReadOnly());
758 this.writestate.flushRequested = false;
759 this.writestate.compacting = 0;
760
761
762 this.splitPolicy = RegionSplitPolicy.create(this, conf);
763
764 this.lastFlushTime = EnvironmentEdgeManager.currentTimeMillis();
765
766
767 long nextSeqid = maxSeqId + 1;
768 if (this.isRecovering) {
769
770
771
772 nextSeqid += this.flushPerChanges + 10000000;
773 }
774 LOG.info("Onlined " + this.getRegionInfo().getShortNameToLog() +
775 "; next sequenceid=" + nextSeqid);
776
777
778 this.closing.set(false);
779 this.closed.set(false);
780
781 if (coprocessorHost != null) {
782 status.setStatus("Running coprocessor post-open hooks");
783 coprocessorHost.postOpen();
784 }
785
786 status.markComplete("Region opened successfully");
787 return nextSeqid;
788 }
789
790 private long initializeRegionStores(final CancelableProgressable reporter, MonitoredTask status)
791 throws IOException, UnsupportedEncodingException {
792
793
794 long maxSeqId = -1;
795
796 long maxMemstoreTS = -1;
797
798 if (!htableDescriptor.getFamilies().isEmpty()) {
799
800 ThreadPoolExecutor storeOpenerThreadPool =
801 getStoreOpenAndCloseThreadPool("StoreOpener-" + this.getRegionInfo().getShortNameToLog());
802 CompletionService<HStore> completionService =
803 new ExecutorCompletionService<HStore>(storeOpenerThreadPool);
804
805
806 for (final HColumnDescriptor family : htableDescriptor.getFamilies()) {
807 status.setStatus("Instantiating store for column family " + family);
808 completionService.submit(new Callable<HStore>() {
809 @Override
810 public HStore call() throws IOException {
811 return instantiateHStore(family);
812 }
813 });
814 }
815 boolean allStoresOpened = false;
816 try {
817 for (int i = 0; i < htableDescriptor.getFamilies().size(); i++) {
818 Future<HStore> future = completionService.take();
819 HStore store = future.get();
820 this.stores.put(store.getColumnFamilyName().getBytes(), store);
821
822 long storeMaxSequenceId = store.getMaxSequenceId();
823 maxSeqIdInStores.put(store.getColumnFamilyName().getBytes(),
824 storeMaxSequenceId);
825 if (maxSeqId == -1 || storeMaxSequenceId > maxSeqId) {
826 maxSeqId = storeMaxSequenceId;
827 }
828 long maxStoreMemstoreTS = store.getMaxMemstoreTS();
829 if (maxStoreMemstoreTS > maxMemstoreTS) {
830 maxMemstoreTS = maxStoreMemstoreTS;
831 }
832 }
833 allStoresOpened = true;
834 } catch (InterruptedException e) {
835 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
836 } catch (ExecutionException e) {
837 throw new IOException(e.getCause());
838 } finally {
839 storeOpenerThreadPool.shutdownNow();
840 if (!allStoresOpened) {
841
842 LOG.error("Could not initialize all stores for the region=" + this);
843 for (Store store : this.stores.values()) {
844 try {
845 store.close();
846 } catch (IOException e) {
847 LOG.warn(e.getMessage());
848 }
849 }
850 }
851 }
852 }
853 mvcc.initialize(maxMemstoreTS + 1);
854
855 maxSeqId = Math.max(maxSeqId, replayRecoveredEditsIfAny(
856 this.fs.getRegionDir(), maxSeqIdInStores, reporter, status));
857 return maxSeqId;
858 }
859
860
861
862
863 public boolean hasReferences() {
864 for (Store store : this.stores.values()) {
865 if (store.hasReferences()) return true;
866 }
867 return false;
868 }
869
870
871
872
873
874
875 public HDFSBlocksDistribution getHDFSBlocksDistribution() {
876 HDFSBlocksDistribution hdfsBlocksDistribution =
877 new HDFSBlocksDistribution();
878 synchronized (this.stores) {
879 for (Store store : this.stores.values()) {
880 for (StoreFile sf : store.getStorefiles()) {
881 HDFSBlocksDistribution storeFileBlocksDistribution =
882 sf.getHDFSBlockDistribution();
883 hdfsBlocksDistribution.add(storeFileBlocksDistribution);
884 }
885 }
886 }
887 return hdfsBlocksDistribution;
888 }
889
890
891
892
893
894
895
896
897
898 public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf,
899 final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo) throws IOException {
900 Path tablePath = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableDescriptor.getTableName());
901 return computeHDFSBlocksDistribution(conf, tableDescriptor, regionInfo, tablePath);
902 }
903
904
905
906
907
908
909
910
911
912
913 public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf,
914 final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath)
915 throws IOException {
916 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution();
917 FileSystem fs = tablePath.getFileSystem(conf);
918
919 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo);
920 for (HColumnDescriptor family: tableDescriptor.getFamilies()) {
921 Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family.getNameAsString());
922 if (storeFiles == null) continue;
923
924 for (StoreFileInfo storeFileInfo : storeFiles) {
925 hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs));
926 }
927 }
928 return hdfsBlocksDistribution;
929 }
930
931 public AtomicLong getMemstoreSize() {
932 return memstoreSize;
933 }
934
935
936
937
938
939
940
941 public long addAndGetGlobalMemstoreSize(long memStoreSize) {
942 if (this.rsAccounting != null) {
943 rsAccounting.addAndGetGlobalMemstoreSize(memStoreSize);
944 }
945 return this.memstoreSize.addAndGet(memStoreSize);
946 }
947
948
949 public HRegionInfo getRegionInfo() {
950 return this.fs.getRegionInfo();
951 }
952
953
954
955
956
957 RegionServerServices getRegionServerServices() {
958 return this.rsServices;
959 }
960
961
962
963
964 public RegionSplitPolicy getSplitPolicy() {
965 return this.splitPolicy;
966 }
967
968
969 long getReadRequestsCount() {
970 return this.readRequestsCount.get();
971 }
972
973
974 long getWriteRequestsCount() {
975 return this.writeRequestsCount.get();
976 }
977
978 public MetricsRegion getMetrics() {
979 return metricsRegion;
980 }
981
982
983 public boolean isClosed() {
984 return this.closed.get();
985 }
986
987
988
989
990 public boolean isClosing() {
991 return this.closing.get();
992 }
993
994
995
996
997
998 public void setRecovering(boolean newState) {
999 boolean wasRecovering = this.isRecovering;
1000 this.isRecovering = newState;
1001 if (wasRecovering && !isRecovering) {
1002
1003 coprocessorHost.postLogReplay();
1004 }
1005 }
1006
1007
1008
1009
1010 public boolean isRecovering() {
1011 return this.isRecovering;
1012 }
1013
1014
1015 public boolean isAvailable() {
1016 return !isClosed() && !isClosing();
1017 }
1018
1019
1020 public boolean isSplittable() {
1021 return isAvailable() && !hasReferences();
1022 }
1023
1024
1025
1026
1027 public boolean isMergeable() {
1028 if (!isAvailable()) {
1029 LOG.debug("Region " + this.getRegionNameAsString()
1030 + " is not mergeable because it is closing or closed");
1031 return false;
1032 }
1033 if (hasReferences()) {
1034 LOG.debug("Region " + this.getRegionNameAsString()
1035 + " is not mergeable because it has references");
1036 return false;
1037 }
1038
1039 return true;
1040 }
1041
1042 public boolean areWritesEnabled() {
1043 synchronized(this.writestate) {
1044 return this.writestate.writesEnabled;
1045 }
1046 }
1047
1048 public MultiVersionConsistencyControl getMVCC() {
1049 return mvcc;
1050 }
1051
1052
1053
1054
1055 public long getReadpoint(IsolationLevel isolationLevel) {
1056 if (isolationLevel == IsolationLevel.READ_UNCOMMITTED) {
1057
1058 return Long.MAX_VALUE;
1059 }
1060 return mvcc.memstoreReadPoint();
1061 }
1062
1063 public boolean isLoadingCfsOnDemandDefault() {
1064 return this.isLoadingCfsOnDemandDefault;
1065 }
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083 public Map<byte[], List<StoreFile>> close() throws IOException {
1084 return close(false);
1085 }
1086
1087 private final Object closeLock = new Object();
1088
1089
1090 public static final String MEMSTORE_PERIODIC_FLUSH_INTERVAL =
1091 "hbase.regionserver.optionalcacheflushinterval";
1092
1093 public static final int DEFAULT_CACHE_FLUSH_INTERVAL = 3600000;
1094
1095
1096 public static final String MEMSTORE_FLUSH_PER_CHANGES =
1097 "hbase.regionserver.flush.per.changes";
1098 public static final long DEFAULT_FLUSH_PER_CHANGES = 30000000;
1099
1100
1101
1102
1103 public static final long MAX_FLUSH_PER_CHANGES = 1000000000;
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122 public Map<byte[], List<StoreFile>> close(final boolean abort) throws IOException {
1123
1124
1125 MonitoredTask status = TaskMonitor.get().createStatus(
1126 "Closing region " + this +
1127 (abort ? " due to abort" : ""));
1128
1129 status.setStatus("Waiting for close lock");
1130 try {
1131 synchronized (closeLock) {
1132 return doClose(abort, status);
1133 }
1134 } finally {
1135 status.cleanup();
1136 }
1137 }
1138
1139
1140
1141
1142 @VisibleForTesting
1143 public void setClosing(boolean closing) {
1144 this.closing.set(closing);
1145 }
1146
1147 private Map<byte[], List<StoreFile>> doClose(final boolean abort, MonitoredTask status)
1148 throws IOException {
1149 if (isClosed()) {
1150 LOG.warn("Region " + this + " already closed");
1151 return null;
1152 }
1153
1154 if (coprocessorHost != null) {
1155 status.setStatus("Running coprocessor pre-close hooks");
1156 this.coprocessorHost.preClose(abort);
1157 }
1158
1159 status.setStatus("Disabling compacts and flushes for region");
1160 synchronized (writestate) {
1161
1162
1163 writestate.writesEnabled = false;
1164 LOG.debug("Closing " + this + ": disabling compactions & flushes");
1165 waitForFlushesAndCompactions();
1166 }
1167
1168
1169
1170 if (!abort && worthPreFlushing()) {
1171 status.setStatus("Pre-flushing region before close");
1172 LOG.info("Running close preflush of " + this.getRegionNameAsString());
1173 try {
1174 internalFlushcache(status);
1175 } catch (IOException ioe) {
1176
1177 status.setStatus("Failed pre-flush " + this + "; " + ioe.getMessage());
1178 }
1179 }
1180
1181
1182 lock.writeLock().lock();
1183 this.closing.set(true);
1184 status.setStatus("Disabling writes for close");
1185 try {
1186 if (this.isClosed()) {
1187 status.abort("Already got closed by another process");
1188
1189 return null;
1190 }
1191 LOG.debug("Updates disabled for region " + this);
1192
1193 if (!abort) {
1194 int flushCount = 0;
1195 while (this.getMemstoreSize().get() > 0) {
1196 try {
1197 if (flushCount++ > 0) {
1198 int actualFlushes = flushCount - 1;
1199 if (actualFlushes > 5) {
1200
1201
1202 throw new DroppedSnapshotException("Failed clearing memory after " +
1203 actualFlushes + " attempts on region: " + Bytes.toStringBinary(getRegionName()));
1204 }
1205 LOG.info("Running extra flush, " + actualFlushes +
1206 " (carrying snapshot?) " + this);
1207 }
1208 internalFlushcache(status);
1209 } catch (IOException ioe) {
1210 status.setStatus("Failed flush " + this + ", putting online again");
1211 synchronized (writestate) {
1212 writestate.writesEnabled = true;
1213 }
1214
1215 throw ioe;
1216 }
1217 }
1218 }
1219
1220 Map<byte[], List<StoreFile>> result =
1221 new TreeMap<byte[], List<StoreFile>>(Bytes.BYTES_COMPARATOR);
1222 if (!stores.isEmpty()) {
1223
1224 ThreadPoolExecutor storeCloserThreadPool =
1225 getStoreOpenAndCloseThreadPool("StoreCloserThread-" + this.getRegionNameAsString());
1226 CompletionService<Pair<byte[], Collection<StoreFile>>> completionService =
1227 new ExecutorCompletionService<Pair<byte[], Collection<StoreFile>>>(storeCloserThreadPool);
1228
1229
1230 for (final Store store : stores.values()) {
1231 long flushableSize = store.getFlushableSize();
1232 if (!(abort || flushableSize == 0)) {
1233 getRegionServerServices().abort("Assertion failed while closing store "
1234 + getRegionInfo().getRegionNameAsString() + " " + store
1235 + ". flushableSize expected=0, actual= " + flushableSize
1236 + ". Current memstoreSize=" + getMemstoreSize() + ". Maybe a coprocessor "
1237 + "operation failed and left the memstore in a partially updated state.", null);
1238 }
1239 completionService
1240 .submit(new Callable<Pair<byte[], Collection<StoreFile>>>() {
1241 @Override
1242 public Pair<byte[], Collection<StoreFile>> call() throws IOException {
1243 return new Pair<byte[], Collection<StoreFile>>(
1244 store.getFamily().getName(), store.close());
1245 }
1246 });
1247 }
1248 try {
1249 for (int i = 0; i < stores.size(); i++) {
1250 Future<Pair<byte[], Collection<StoreFile>>> future = completionService.take();
1251 Pair<byte[], Collection<StoreFile>> storeFiles = future.get();
1252 List<StoreFile> familyFiles = result.get(storeFiles.getFirst());
1253 if (familyFiles == null) {
1254 familyFiles = new ArrayList<StoreFile>();
1255 result.put(storeFiles.getFirst(), familyFiles);
1256 }
1257 familyFiles.addAll(storeFiles.getSecond());
1258 }
1259 } catch (InterruptedException e) {
1260 throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1261 } catch (ExecutionException e) {
1262 throw new IOException(e.getCause());
1263 } finally {
1264 storeCloserThreadPool.shutdownNow();
1265 }
1266 }
1267 this.closed.set(true);
1268 if (memstoreSize.get() != 0) LOG.error("Memstore size is " + memstoreSize.get());
1269 if (coprocessorHost != null) {
1270 status.setStatus("Running coprocessor post-close hooks");
1271 this.coprocessorHost.postClose(abort);
1272 }
1273 if ( this.metricsRegion != null) {
1274 this.metricsRegion.close();
1275 }
1276 if ( this.metricsRegionWrapper != null) {
1277 Closeables.closeQuietly(this.metricsRegionWrapper);
1278 }
1279 status.markComplete("Closed");
1280 LOG.info("Closed " + this);
1281 return result;
1282 } finally {
1283 lock.writeLock().unlock();
1284 }
1285 }
1286
1287
1288
1289
1290
1291
1292 public void waitForFlushesAndCompactions() {
1293 synchronized (writestate) {
1294 while (writestate.compacting > 0 || writestate.flushing) {
1295 LOG.debug("waiting for " + writestate.compacting + " compactions"
1296 + (writestate.flushing ? " & cache flush" : "") + " to complete for region " + this);
1297 try {
1298 writestate.wait();
1299 } catch (InterruptedException iex) {
1300
1301 Thread.currentThread().interrupt();
1302 }
1303 }
1304 }
1305 }
1306
1307 protected ThreadPoolExecutor getStoreOpenAndCloseThreadPool(
1308 final String threadNamePrefix) {
1309 int numStores = Math.max(1, this.htableDescriptor.getFamilies().size());
1310 int maxThreads = Math.min(numStores,
1311 conf.getInt(HConstants.HSTORE_OPEN_AND_CLOSE_THREADS_MAX,
1312 HConstants.DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX));
1313 return getOpenAndCloseThreadPool(maxThreads, threadNamePrefix);
1314 }
1315
1316 protected ThreadPoolExecutor getStoreFileOpenAndCloseThreadPool(
1317 final String threadNamePrefix) {
1318 int numStores = Math.max(1, this.htableDescriptor.getFamilies().size());
1319 int maxThreads = Math.max(1,
1320 conf.getInt(HConstants.HSTORE_OPEN_AND_CLOSE_THREADS_MAX,
1321 HConstants.DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX)
1322 / numStores);
1323 return getOpenAndCloseThreadPool(maxThreads, threadNamePrefix);
1324 }
1325
1326 static ThreadPoolExecutor getOpenAndCloseThreadPool(int maxThreads,
1327 final String threadNamePrefix) {
1328 return Threads.getBoundedCachedThreadPool(maxThreads, 30L, TimeUnit.SECONDS,
1329 new ThreadFactory() {
1330 private int count = 1;
1331
1332 @Override
1333 public Thread newThread(Runnable r) {
1334 return new Thread(r, threadNamePrefix + "-" + count++);
1335 }
1336 });
1337 }
1338
1339
1340
1341
1342 private boolean worthPreFlushing() {
1343 return this.memstoreSize.get() >
1344 this.conf.getLong("hbase.hregion.preclose.flush.size", 1024 * 1024 * 5);
1345 }
1346
1347
1348
1349
1350
1351
1352 public byte [] getStartKey() {
1353 return this.getRegionInfo().getStartKey();
1354 }
1355
1356
1357 public byte [] getEndKey() {
1358 return this.getRegionInfo().getEndKey();
1359 }
1360
1361
1362 public long getRegionId() {
1363 return this.getRegionInfo().getRegionId();
1364 }
1365
1366
1367 public byte [] getRegionName() {
1368 return this.getRegionInfo().getRegionName();
1369 }
1370
1371
1372 public String getRegionNameAsString() {
1373 return this.getRegionInfo().getRegionNameAsString();
1374 }
1375
1376
1377 public HTableDescriptor getTableDesc() {
1378 return this.htableDescriptor;
1379 }
1380
1381
1382 public HLog getLog() {
1383 return this.log;
1384 }
1385
1386
1387
1388
1389
1390
1391
1392
1393 Configuration getBaseConf() {
1394 return this.baseConf;
1395 }
1396
1397
1398 public FileSystem getFilesystem() {
1399 return fs.getFileSystem();
1400 }
1401
1402
1403 public HRegionFileSystem getRegionFileSystem() {
1404 return this.fs;
1405 }
1406
1407
1408 public long getLastFlushTime() {
1409 return this.lastFlushTime;
1410 }
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420 public long getLargestHStoreSize() {
1421 long size = 0;
1422 for (Store h : stores.values()) {
1423 long storeSize = h.getSize();
1424 if (storeSize > size) {
1425 size = storeSize;
1426 }
1427 }
1428 return size;
1429 }
1430
1431
1432
1433
1434 public KeyValue.KVComparator getComparator() {
1435 return this.comparator;
1436 }
1437
1438
1439
1440
1441
1442 protected void doRegionCompactionPrep() throws IOException {
1443 }
1444
1445 void triggerMajorCompaction() {
1446 for (Store h : stores.values()) {
1447 h.triggerMajorCompaction();
1448 }
1449 }
1450
1451
1452
1453
1454
1455
1456
1457
1458 public void compactStores(final boolean majorCompaction)
1459 throws IOException {
1460 if (majorCompaction) {
1461 this.triggerMajorCompaction();
1462 }
1463 compactStores();
1464 }
1465
1466
1467
1468
1469
1470
1471
1472 public void compactStores() throws IOException {
1473 for (Store s : getStores().values()) {
1474 CompactionContext compaction = s.requestCompaction();
1475 if (compaction != null) {
1476 compact(compaction, s, NoLimitCompactionThroughputController.INSTANCE, null);
1477 }
1478 }
1479 }
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 public boolean compact(CompactionContext compaction, Store store,
1496 CompactionThroughputController throughputController) throws IOException {
1497 return compact(compaction, store, throughputController, null);
1498 }
1499
1500 public boolean compact(CompactionContext compaction, Store store,
1501 CompactionThroughputController throughputController, User user) throws IOException {
1502 assert compaction != null && compaction.hasSelection();
1503 assert !compaction.getRequest().getFiles().isEmpty();
1504 if (this.closing.get() || this.closed.get()) {
1505 LOG.debug("Skipping compaction on " + this + " because closing/closed");
1506 store.cancelRequestedCompaction(compaction);
1507 return false;
1508 }
1509 MonitoredTask status = null;
1510 boolean requestNeedsCancellation = true;
1511
1512 lock.readLock().lock();
1513 try {
1514 byte[] cf = Bytes.toBytes(store.getColumnFamilyName());
1515 if (stores.get(cf) != store) {
1516 LOG.warn("Store " + store.getColumnFamilyName() + " on region " + this
1517 + " has been re-instantiated, cancel this compaction request. "
1518 + " It may be caused by the roll back of split transaction");
1519 return false;
1520 }
1521
1522 status = TaskMonitor.get().createStatus("Compacting " + store + " in " + this);
1523 if (this.closed.get()) {
1524 String msg = "Skipping compaction on " + this + " because closed";
1525 LOG.debug(msg);
1526 status.abort(msg);
1527 return false;
1528 }
1529 boolean wasStateSet = false;
1530 try {
1531 synchronized (writestate) {
1532 if (writestate.writesEnabled) {
1533 wasStateSet = true;
1534 ++writestate.compacting;
1535 } else {
1536 String msg = "NOT compacting region " + this + ". Writes disabled.";
1537 LOG.info(msg);
1538 status.abort(msg);
1539 return false;
1540 }
1541 }
1542 LOG.info("Starting compaction on " + store + " in region " + this
1543 + (compaction.getRequest().isOffPeak()?" as an off-peak compaction":""));
1544 doRegionCompactionPrep();
1545 try {
1546 status.setStatus("Compacting store " + store);
1547
1548
1549 requestNeedsCancellation = false;
1550 store.compact(compaction, throughputController, user);
1551 } catch (InterruptedIOException iioe) {
1552 String msg = "compaction interrupted";
1553 LOG.info(msg, iioe);
1554 status.abort(msg);
1555 return false;
1556 }
1557 } finally {
1558 if (wasStateSet) {
1559 synchronized (writestate) {
1560 --writestate.compacting;
1561 if (writestate.compacting <= 0) {
1562 writestate.notifyAll();
1563 }
1564 }
1565 }
1566 }
1567 status.markComplete("Compaction complete");
1568 return true;
1569 } finally {
1570 try {
1571 if (requestNeedsCancellation) store.cancelRequestedCompaction(compaction);
1572 if (status != null) status.cleanup();
1573 } finally {
1574 lock.readLock().unlock();
1575 }
1576 }
1577 }
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600 public FlushResult flushcache() throws IOException {
1601
1602 if (this.closing.get()) {
1603 String msg = "Skipping flush on " + this + " because closing";
1604 LOG.debug(msg);
1605 return new FlushResult(FlushResult.Result.CANNOT_FLUSH, msg);
1606 }
1607 MonitoredTask status = TaskMonitor.get().createStatus("Flushing " + this);
1608 status.setStatus("Acquiring readlock on region");
1609
1610 lock.readLock().lock();
1611 try {
1612 if (this.closed.get()) {
1613 String msg = "Skipping flush on " + this + " because closed";
1614 LOG.debug(msg);
1615 status.abort(msg);
1616 return new FlushResult(FlushResult.Result.CANNOT_FLUSH, msg);
1617 }
1618 if (coprocessorHost != null) {
1619 status.setStatus("Running coprocessor pre-flush hooks");
1620 coprocessorHost.preFlush();
1621 }
1622 if (numMutationsWithoutWAL.get() > 0) {
1623 numMutationsWithoutWAL.set(0);
1624 dataInMemoryWithoutWAL.set(0);
1625 }
1626 synchronized (writestate) {
1627 if (!writestate.flushing && writestate.writesEnabled) {
1628 this.writestate.flushing = true;
1629 } else {
1630 if (LOG.isDebugEnabled()) {
1631 LOG.debug("NOT flushing memstore for region " + this
1632 + ", flushing=" + writestate.flushing + ", writesEnabled="
1633 + writestate.writesEnabled);
1634 }
1635 String msg = "Not flushing since "
1636 + (writestate.flushing ? "already flushing"
1637 : "writes not enabled");
1638 status.abort(msg);
1639 return new FlushResult(FlushResult.Result.CANNOT_FLUSH, msg);
1640 }
1641 }
1642 try {
1643 FlushResult fs = internalFlushcache(status);
1644
1645 if (coprocessorHost != null) {
1646 status.setStatus("Running post-flush coprocessor hooks");
1647 coprocessorHost.postFlush();
1648 }
1649
1650 status.markComplete("Flush successful");
1651 return fs;
1652 } finally {
1653 synchronized (writestate) {
1654 writestate.flushing = false;
1655 this.writestate.flushRequested = false;
1656 writestate.notifyAll();
1657 }
1658 }
1659 } finally {
1660 lock.readLock().unlock();
1661 status.cleanup();
1662 }
1663 }
1664
1665
1666
1667
1668 boolean shouldFlush() {
1669
1670 if (this.completeSequenceId > 0
1671 && (this.completeSequenceId + this.flushPerChanges < this.sequenceId.get())) {
1672 return true;
1673 }
1674 if (flushCheckInterval <= 0) {
1675 return false;
1676 }
1677 long now = EnvironmentEdgeManager.currentTimeMillis();
1678
1679 if ((now - getLastFlushTime() < flushCheckInterval)) {
1680 return false;
1681 }
1682
1683
1684 for (Store s : this.getStores().values()) {
1685 if (s.timeOfOldestEdit() < now - flushCheckInterval) {
1686
1687 return true;
1688 }
1689 }
1690 return false;
1691 }
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728 protected FlushResult internalFlushcache(MonitoredTask status)
1729 throws IOException {
1730 return internalFlushcache(this.log, -1, status);
1731 }
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742 protected FlushResult internalFlushcache(
1743 final HLog wal, final long myseqid, MonitoredTask status)
1744 throws IOException {
1745 if (this.rsServices != null && this.rsServices.isAborted()) {
1746
1747 throw new IOException("Aborting flush because server is abortted...");
1748 }
1749 final long startTime = EnvironmentEdgeManager.currentTimeMillis();
1750
1751
1752 if (this.memstoreSize.get() <= 0) {
1753 if(LOG.isDebugEnabled()) {
1754 LOG.debug("Empty memstore size for the current region "+this);
1755 }
1756 return new FlushResult(FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY, "Nothing to flush");
1757 }
1758
1759 LOG.info("Started memstore flush for " + this +
1760 ", current region memstore size " +
1761 StringUtils.humanReadableInt(this.memstoreSize.get()) +
1762 ((wal != null)? "": "; wal is null, using passed sequenceid=" + myseqid));
1763
1764
1765
1766
1767
1768
1769
1770
1771 MultiVersionConsistencyControl.WriteEntry w = null;
1772
1773
1774
1775
1776 status.setStatus("Obtaining lock to block concurrent updates");
1777
1778 this.updatesLock.writeLock().lock();
1779 long totalFlushableSize = 0;
1780 status.setStatus("Preparing to flush by snapshotting stores");
1781 TreeMap<byte[], StoreFlushContext> storeFlushCtxs
1782 = new TreeMap<byte[], StoreFlushContext>(Bytes.BYTES_COMPARATOR);
1783 TreeMap<byte[], Long> storeFlushableSize = new TreeMap<byte[], Long>(Bytes.BYTES_COMPARATOR);
1784 long flushSeqId = -1L;
1785 try {
1786
1787 w = mvcc.beginMemstoreInsert();
1788 mvcc.advanceMemstore(w);
1789
1790 if (wal != null) {
1791 if (!wal.startCacheFlush(this.getRegionInfo().getEncodedNameAsBytes())) {
1792 String msg = "Flush will not be started for ["
1793 + this.getRegionInfo().getEncodedName() + "] - because the WAL is closing.";
1794 status.setStatus(msg);
1795 return new FlushResult(FlushResult.Result.CANNOT_FLUSH, msg);
1796 }
1797 flushSeqId = this.sequenceId.incrementAndGet();
1798 } else {
1799
1800 flushSeqId = myseqid;
1801 }
1802
1803 for (Store s : stores.values()) {
1804 totalFlushableSize += s.getFlushableSize();
1805 byte[] storeName = s.getFamily().getName();
1806 storeFlushCtxs.put(storeName, s.createFlushContext(flushSeqId));
1807 storeFlushableSize.put(storeName, s.getFlushableSize());
1808 }
1809
1810
1811 for (StoreFlushContext flush : storeFlushCtxs.values()) {
1812 flush.prepare();
1813 }
1814 } finally {
1815 this.updatesLock.writeLock().unlock();
1816 }
1817 boolean compactionRequested = false;
1818 try {
1819 String s = "Finished memstore snapshotting " + this +
1820 ", syncing WAL and waiting on mvcc, flushsize=" + totalFlushableSize;
1821 status.setStatus(s);
1822 if (LOG.isTraceEnabled()) LOG.trace(s);
1823
1824
1825
1826 if (wal != null && !shouldSyncLog()) {
1827 wal.sync();
1828 }
1829
1830
1831
1832
1833
1834
1835 mvcc.waitForRead(w);
1836
1837 s = "Flushing stores of " + this;
1838 status.setStatus(s);
1839 if (LOG.isTraceEnabled()) LOG.trace(s);
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850 for (StoreFlushContext flush : storeFlushCtxs.values()) {
1851 flush.flushCache(status);
1852 }
1853
1854
1855
1856 for (Map.Entry<byte[], StoreFlushContext> flushEntry : storeFlushCtxs.entrySet()) {
1857 byte[] storeName = flushEntry.getKey();
1858 StoreFlushContext flush = flushEntry.getValue();
1859 boolean needsCompaction = flush.commit(status);
1860 if (needsCompaction) {
1861 compactionRequested = true;
1862 }
1863 if (flush.getCommittedFiles() == null || flush.getCommittedFiles().isEmpty()) {
1864 totalFlushableSize -= storeFlushableSize.get(storeName);
1865 }
1866 }
1867 storeFlushCtxs.clear();
1868
1869
1870 this.addAndGetGlobalMemstoreSize(-totalFlushableSize);
1871 } catch (Throwable t) {
1872
1873
1874
1875
1876
1877
1878 if (wal != null) {
1879 wal.abortCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
1880 }
1881 DroppedSnapshotException dse = new DroppedSnapshotException("region: " +
1882 Bytes.toStringBinary(getRegionName()));
1883 dse.initCause(t);
1884 status.abort("Flush failed: " + StringUtils.stringifyException(t));
1885
1886
1887
1888
1889
1890 this.closing.set(true);
1891
1892 if (rsServices != null) {
1893
1894 rsServices.abort("Replay of WAL required. Forcing server shutdown", dse);
1895 }
1896
1897 throw dse;
1898 }
1899
1900
1901 if (wal != null) {
1902 wal.completeCacheFlush(this.getRegionInfo().getEncodedNameAsBytes());
1903 }
1904
1905
1906 this.lastFlushTime = EnvironmentEdgeManager.currentTimeMillis();
1907
1908
1909 completeSequenceId = flushSeqId;
1910
1911
1912
1913 synchronized (this) {
1914 notifyAll();
1915 }
1916
1917 long time = EnvironmentEdgeManager.currentTimeMillis() - startTime;
1918 long memstoresize = this.memstoreSize.get();
1919 String msg = "Finished memstore flush of ~" +
1920 StringUtils.humanReadableInt(totalFlushableSize) + "/" + totalFlushableSize +
1921 ", currentsize=" +
1922 StringUtils.humanReadableInt(memstoresize) + "/" + memstoresize +
1923 " for region " + this + " in " + time + "ms, sequenceid=" + flushSeqId +
1924 ", compaction requested=" + compactionRequested +
1925 ((wal == null)? "; wal=null": "");
1926 LOG.info(msg);
1927 status.setStatus(msg);
1928 this.recentFlushes.add(new Pair<Long,Long>(time/1000, totalFlushableSize));
1929
1930 return new FlushResult(compactionRequested ? FlushResult.Result.FLUSHED_COMPACTION_NEEDED :
1931 FlushResult.Result.FLUSHED_NO_COMPACTION_NEEDED, flushSeqId);
1932 }
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946 Result getClosestRowBefore(final byte [] row)
1947 throws IOException{
1948 return getClosestRowBefore(row, HConstants.CATALOG_FAMILY);
1949 }
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961 public Result getClosestRowBefore(final byte [] row, final byte [] family)
1962 throws IOException {
1963 if (coprocessorHost != null) {
1964 Result result = new Result();
1965 if (coprocessorHost.preGetClosestRowBefore(row, family, result)) {
1966 return result;
1967 }
1968 }
1969
1970
1971 checkRow(row, "getClosestRowBefore");
1972 startRegionOperation(Operation.GET);
1973 this.readRequestsCount.increment();
1974 try {
1975 Store store = getStore(family);
1976
1977 KeyValue key = store.getRowKeyAtOrBefore(row);
1978 Result result = null;
1979 if (key != null) {
1980 Get get = new Get(key.getRow());
1981 get.addFamily(family);
1982 result = get(get);
1983 }
1984 if (coprocessorHost != null) {
1985 coprocessorHost.postGetClosestRowBefore(row, family, result);
1986 }
1987 return result;
1988 } finally {
1989 closeRegionOperation(Operation.GET);
1990 }
1991 }
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003 public RegionScanner getScanner(Scan scan) throws IOException {
2004 return getScanner(scan, null);
2005 }
2006
2007 void prepareScanner(Scan scan) throws IOException {
2008 if(!scan.hasFamilies()) {
2009
2010 for(byte[] family: this.htableDescriptor.getFamiliesKeys()){
2011 scan.addFamily(family);
2012 }
2013 }
2014 }
2015
2016 protected RegionScanner getScanner(Scan scan,
2017 List<KeyValueScanner> additionalScanners) throws IOException {
2018 startRegionOperation(Operation.SCAN);
2019 try {
2020
2021 prepareScanner(scan);
2022 if(scan.hasFamilies()) {
2023 for(byte [] family : scan.getFamilyMap().keySet()) {
2024 checkFamily(family);
2025 }
2026 }
2027 return instantiateRegionScanner(scan, additionalScanners);
2028 } finally {
2029 closeRegionOperation(Operation.SCAN);
2030 }
2031 }
2032
2033 protected RegionScanner instantiateRegionScanner(Scan scan,
2034 List<KeyValueScanner> additionalScanners) throws IOException {
2035 if (scan.isReversed()) {
2036 if (scan.getFilter() != null) {
2037 scan.getFilter().setReversed(true);
2038 }
2039 return new ReversedRegionScannerImpl(scan, additionalScanners, this);
2040 }
2041 return new RegionScannerImpl(scan, additionalScanners, this);
2042 }
2043
2044
2045
2046
2047 void prepareDelete(Delete delete) throws IOException {
2048
2049 if(delete.getFamilyCellMap().isEmpty()){
2050 for(byte [] family : this.htableDescriptor.getFamiliesKeys()){
2051
2052 delete.deleteFamily(family, delete.getTimeStamp());
2053 }
2054 } else {
2055 for(byte [] family : delete.getFamilyCellMap().keySet()) {
2056 if(family == null) {
2057 throw new NoSuchColumnFamilyException("Empty family is invalid");
2058 }
2059 checkFamily(family);
2060 }
2061 }
2062 }
2063
2064
2065
2066
2067
2068
2069
2070
2071 public void delete(Delete delete)
2072 throws IOException {
2073 checkReadOnly();
2074 checkResources();
2075 startRegionOperation(Operation.DELETE);
2076 try {
2077 delete.getRow();
2078
2079 doBatchMutate(delete);
2080 } finally {
2081 closeRegionOperation(Operation.DELETE);
2082 }
2083 }
2084
2085
2086
2087
2088 private static final byte [] FOR_UNIT_TESTS_ONLY = Bytes.toBytes("ForUnitTestsOnly");
2089
2090
2091
2092
2093
2094
2095 void delete(NavigableMap<byte[], List<Cell>> familyMap,
2096 Durability durability) throws IOException {
2097 Delete delete = new Delete(FOR_UNIT_TESTS_ONLY);
2098 delete.setFamilyCellMap(familyMap);
2099 delete.setDurability(durability);
2100 doBatchMutate(delete);
2101 }
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111 void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyMap,
2112 byte[] byteNow) throws IOException {
2113 for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
2114
2115 byte[] family = e.getKey();
2116 List<Cell> cells = e.getValue();
2117 assert cells instanceof RandomAccess;
2118
2119 Map<byte[], Integer> kvCount = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
2120 int listSize = cells.size();
2121 for (int i=0; i < listSize; i++) {
2122 Cell cell = cells.get(i);
2123 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
2124
2125
2126 if (kv.isLatestTimestamp() && kv.isDeleteType()) {
2127 byte[] qual = kv.getQualifier();
2128 if (qual == null) qual = HConstants.EMPTY_BYTE_ARRAY;
2129
2130 Integer count = kvCount.get(qual);
2131 if (count == null) {
2132 kvCount.put(qual, 1);
2133 } else {
2134 kvCount.put(qual, count + 1);
2135 }
2136 count = kvCount.get(qual);
2137
2138 Get get = new Get(kv.getRow());
2139 get.setMaxVersions(count);
2140 get.addColumn(family, qual);
2141 if (coprocessorHost != null) {
2142 if (!coprocessorHost.prePrepareTimeStampForDeleteVersion(mutation, cell,
2143 byteNow, get)) {
2144 updateDeleteLatestVersionTimeStamp(kv, get, count, byteNow);
2145 }
2146 } else {
2147 updateDeleteLatestVersionTimeStamp(kv, get, count, byteNow);
2148 }
2149 } else {
2150 kv.updateLatestStamp(byteNow);
2151 }
2152 }
2153 }
2154 }
2155
2156 void updateDeleteLatestVersionTimeStamp(KeyValue kv, Get get, int count, byte[] byteNow)
2157 throws IOException {
2158 List<Cell> result = get(get, false);
2159
2160 if (result.size() < count) {
2161
2162 kv.updateLatestStamp(byteNow);
2163 return;
2164 }
2165 if (result.size() > count) {
2166 throw new RuntimeException("Unexpected size: " + result.size());
2167 }
2168 KeyValue getkv = KeyValueUtil.ensureKeyValue(result.get(count - 1));
2169 Bytes.putBytes(kv.getBuffer(), kv.getTimestampOffset(), getkv.getBuffer(),
2170 getkv.getTimestampOffset(), Bytes.SIZEOF_LONG);
2171 }
2172
2173
2174
2175
2176
2177 public void put(Put put)
2178 throws IOException {
2179 checkReadOnly();
2180
2181
2182
2183
2184
2185 checkResources();
2186 startRegionOperation(Operation.PUT);
2187 try {
2188
2189 doBatchMutate(put);
2190 } finally {
2191 closeRegionOperation(Operation.PUT);
2192 }
2193 }
2194
2195
2196
2197
2198
2199
2200 private abstract static class BatchOperationInProgress<T> {
2201 T[] operations;
2202 int nextIndexToProcess = 0;
2203 OperationStatus[] retCodeDetails;
2204 WALEdit[] walEditsFromCoprocessors;
2205
2206 public BatchOperationInProgress(T[] operations) {
2207 this.operations = operations;
2208 this.retCodeDetails = new OperationStatus[operations.length];
2209 this.walEditsFromCoprocessors = new WALEdit[operations.length];
2210 Arrays.fill(this.retCodeDetails, OperationStatus.NOT_RUN);
2211 }
2212
2213 public abstract Mutation getMutation(int index);
2214 public abstract long getNonceGroup(int index);
2215 public abstract long getNonce(int index);
2216
2217 public abstract Mutation[] getMutationsForCoprocs();
2218 public abstract boolean isInReplay();
2219
2220 public boolean isDone() {
2221 return nextIndexToProcess == operations.length;
2222 }
2223 }
2224
2225 private static class MutationBatch extends BatchOperationInProgress<Mutation> {
2226 private long nonceGroup;
2227 private long nonce;
2228 public MutationBatch(Mutation[] operations, long nonceGroup, long nonce) {
2229 super(operations);
2230 this.nonceGroup = nonceGroup;
2231 this.nonce = nonce;
2232 }
2233
2234 public Mutation getMutation(int index) {
2235 return this.operations[index];
2236 }
2237
2238 @Override
2239 public long getNonceGroup(int index) {
2240 return nonceGroup;
2241 }
2242
2243 @Override
2244 public long getNonce(int index) {
2245 return nonce;
2246 }
2247
2248 @Override
2249 public Mutation[] getMutationsForCoprocs() {
2250 return this.operations;
2251 }
2252
2253 @Override
2254 public boolean isInReplay() {
2255 return false;
2256 }
2257 }
2258
2259 private static class ReplayBatch extends BatchOperationInProgress<HLogSplitter.MutationReplay> {
2260 public ReplayBatch(MutationReplay[] operations) {
2261 super(operations);
2262 }
2263
2264 @Override
2265 public Mutation getMutation(int index) {
2266 return this.operations[index].mutation;
2267 }
2268
2269 @Override
2270 public long getNonceGroup(int index) {
2271 return this.operations[index].nonceGroup;
2272 }
2273
2274 @Override
2275 public long getNonce(int index) {
2276 return this.operations[index].nonce;
2277 }
2278
2279 @Override
2280 public Mutation[] getMutationsForCoprocs() {
2281 assert false;
2282 throw new RuntimeException("Should not be called for replay batch");
2283 }
2284
2285 @Override
2286 public boolean isInReplay() {
2287 return true;
2288 }
2289 }
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299 public OperationStatus[] batchMutate(
2300 Mutation[] mutations, long nonceGroup, long nonce) throws IOException {
2301
2302
2303
2304
2305 return batchMutate(new MutationBatch(mutations, nonceGroup, nonce));
2306 }
2307
2308 public OperationStatus[] batchMutate(Mutation[] mutations) throws IOException {
2309 return batchMutate(mutations, HConstants.NO_NONCE, HConstants.NO_NONCE);
2310 }
2311
2312
2313
2314
2315
2316
2317
2318
2319 public OperationStatus[] batchReplay(HLogSplitter.MutationReplay[] mutations)
2320 throws IOException {
2321 return batchMutate(new ReplayBatch(mutations));
2322 }
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 OperationStatus[] batchMutate(BatchOperationInProgress<?> batchOp) throws IOException {
2333 boolean initialized = false;
2334 Operation op = batchOp.isInReplay() ? Operation.REPLAY_BATCH_MUTATE : Operation.BATCH_MUTATE;
2335 startRegionOperation(op);
2336 try {
2337 while (!batchOp.isDone()) {
2338 if (!batchOp.isInReplay()) {
2339 checkReadOnly();
2340 }
2341 checkResources();
2342
2343 if (!initialized) {
2344 this.writeRequestsCount.add(batchOp.operations.length);
2345 if (!batchOp.isInReplay()) {
2346 doPreMutationHook(batchOp);
2347 }
2348 initialized = true;
2349 }
2350 long addedSize = doMiniBatchMutation(batchOp);
2351 long newSize = this.addAndGetGlobalMemstoreSize(addedSize);
2352 if (isFlushSize(newSize)) {
2353 requestFlush();
2354 }
2355 }
2356 } finally {
2357 closeRegionOperation(op);
2358 }
2359 return batchOp.retCodeDetails;
2360 }
2361
2362
2363 private void doPreMutationHook(BatchOperationInProgress<?> batchOp)
2364 throws IOException {
2365
2366 WALEdit walEdit = new WALEdit();
2367 if (coprocessorHost != null) {
2368 for (int i = 0 ; i < batchOp.operations.length; i++) {
2369 Mutation m = batchOp.getMutation(i);
2370 if (m instanceof Put) {
2371 if (coprocessorHost.prePut((Put) m, walEdit, m.getDurability())) {
2372
2373
2374 batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
2375 }
2376 } else if (m instanceof Delete) {
2377 Delete curDel = (Delete) m;
2378 if (curDel.getFamilyCellMap().isEmpty()) {
2379
2380 prepareDelete(curDel);
2381 }
2382 if (coprocessorHost.preDelete(curDel, walEdit, m.getDurability())) {
2383
2384
2385 batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
2386 }
2387 } else {
2388
2389
2390
2391 batchOp.retCodeDetails[i] = new OperationStatus(OperationStatusCode.FAILURE,
2392 "Put/Delete mutations only supported in batchMutate() now");
2393 }
2394 if (!walEdit.isEmpty()) {
2395 batchOp.walEditsFromCoprocessors[i] = walEdit;
2396 walEdit = new WALEdit();
2397 }
2398 }
2399 }
2400 }
2401
2402 @SuppressWarnings("unchecked")
2403 private long doMiniBatchMutation(BatchOperationInProgress<?> batchOp) throws IOException {
2404 boolean isInReplay = batchOp.isInReplay();
2405
2406 boolean putsCfSetConsistent = true;
2407
2408 Set<byte[]> putsCfSet = null;
2409
2410 boolean deletesCfSetConsistent = true;
2411
2412 Set<byte[]> deletesCfSet = null;
2413
2414 long currentNonceGroup = HConstants.NO_NONCE, currentNonce = HConstants.NO_NONCE;
2415 WALEdit walEdit = new WALEdit(isInReplay);
2416 MultiVersionConsistencyControl.WriteEntry w = null;
2417 long txid = 0;
2418 boolean doRollBackMemstore = false;
2419 boolean locked = false;
2420
2421
2422 List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
2423
2424 Map<byte[], List<Cell>>[] familyMaps = new Map[batchOp.operations.length];
2425
2426 int firstIndex = batchOp.nextIndexToProcess;
2427 int lastIndexExclusive = firstIndex;
2428 boolean success = false;
2429 int noOfPuts = 0, noOfDeletes = 0;
2430 try {
2431
2432
2433
2434
2435 int numReadyToWrite = 0;
2436 long now = EnvironmentEdgeManager.currentTimeMillis();
2437 while (lastIndexExclusive < batchOp.operations.length) {
2438 Mutation mutation = batchOp.getMutation(lastIndexExclusive);
2439 boolean isPutMutation = mutation instanceof Put;
2440
2441 Map<byte[], List<Cell>> familyMap = mutation.getFamilyCellMap();
2442
2443 familyMaps[lastIndexExclusive] = familyMap;
2444
2445
2446 if (batchOp.retCodeDetails[lastIndexExclusive].getOperationStatusCode()
2447 != OperationStatusCode.NOT_RUN) {
2448 lastIndexExclusive++;
2449 continue;
2450 }
2451
2452 try {
2453 if (isPutMutation) {
2454
2455 if (isInReplay) {
2456 removeNonExistentColumnFamilyForReplay(familyMap);
2457 } else {
2458 checkFamilies(familyMap.keySet());
2459 }
2460 checkTimestamps(mutation.getFamilyCellMap(), now);
2461 } else {
2462 prepareDelete((Delete) mutation);
2463 }
2464 checkRow(mutation.getRow(), "doMiniBatchMutation");
2465 } catch (NoSuchColumnFamilyException nscf) {
2466 LOG.warn("No such column family in batch mutation", nscf);
2467 batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
2468 OperationStatusCode.BAD_FAMILY, nscf.getMessage());
2469 lastIndexExclusive++;
2470 continue;
2471 } catch (FailedSanityCheckException fsce) {
2472 LOG.warn("Batch Mutation did not pass sanity check", fsce);
2473 batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
2474 OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage());
2475 lastIndexExclusive++;
2476 continue;
2477 } catch (WrongRegionException we) {
2478 LOG.warn("Batch mutation had a row that does not belong to this region", we);
2479 batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
2480 OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage());
2481 lastIndexExclusive++;
2482 continue;
2483 }
2484
2485
2486
2487 boolean shouldBlock = numReadyToWrite == 0;
2488 RowLock rowLock = null;
2489 try {
2490 rowLock = getRowLockInternal(mutation.getRow(), shouldBlock);
2491 } catch (IOException ioe) {
2492 LOG.warn("Failed getting lock in batch put, row="
2493 + Bytes.toStringBinary(mutation.getRow()), ioe);
2494 }
2495 if (rowLock == null) {
2496
2497 break;
2498 } else {
2499 acquiredRowLocks.add(rowLock);
2500 }
2501
2502 lastIndexExclusive++;
2503 numReadyToWrite++;
2504
2505 if (isPutMutation) {
2506
2507
2508
2509 if (putsCfSet == null) {
2510 putsCfSet = mutation.getFamilyCellMap().keySet();
2511 } else {
2512 putsCfSetConsistent = putsCfSetConsistent
2513 && mutation.getFamilyCellMap().keySet().equals(putsCfSet);
2514 }
2515 } else {
2516 if (deletesCfSet == null) {
2517 deletesCfSet = mutation.getFamilyCellMap().keySet();
2518 } else {
2519 deletesCfSetConsistent = deletesCfSetConsistent
2520 && mutation.getFamilyCellMap().keySet().equals(deletesCfSet);
2521 }
2522 }
2523 }
2524
2525
2526
2527 now = EnvironmentEdgeManager.currentTimeMillis();
2528 byte[] byteNow = Bytes.toBytes(now);
2529
2530
2531 if (numReadyToWrite <= 0) return 0L;
2532
2533
2534
2535
2536
2537
2538 for (int i = firstIndex; i < lastIndexExclusive; i++) {
2539
2540 if (batchOp.retCodeDetails[i].getOperationStatusCode()
2541 != OperationStatusCode.NOT_RUN) continue;
2542
2543 Mutation mutation = batchOp.getMutation(i);
2544 if (mutation instanceof Put) {
2545 updateKVTimestamps(familyMaps[i].values(), byteNow);
2546 noOfPuts++;
2547 } else {
2548 if (!isInReplay) {
2549 prepareDeleteTimestamps(mutation, familyMaps[i], byteNow);
2550 }
2551 noOfDeletes++;
2552 }
2553 rewriteCellTags(familyMaps[i], mutation);
2554 }
2555
2556 lock(this.updatesLock.readLock(), numReadyToWrite);
2557 locked = true;
2558
2559
2560
2561
2562
2563 w = mvcc.beginMemstoreInsert();
2564
2565
2566 if (!isInReplay && coprocessorHost != null) {
2567 MiniBatchOperationInProgress<Mutation> miniBatchOp =
2568 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
2569 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
2570 if (coprocessorHost.preBatchMutate(miniBatchOp)) return 0L;
2571 }
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582 long addedSize = 0;
2583 for (int i = firstIndex; i < lastIndexExclusive; i++) {
2584 if (batchOp.retCodeDetails[i].getOperationStatusCode()
2585 != OperationStatusCode.NOT_RUN) {
2586 continue;
2587 }
2588 doRollBackMemstore = true;
2589 addedSize += applyFamilyMapToMemstore(familyMaps[i], w);
2590 }
2591
2592
2593
2594
2595 boolean hasWalAppends = false;
2596 Durability durability = Durability.USE_DEFAULT;
2597 for (int i = firstIndex; i < lastIndexExclusive; i++) {
2598
2599 if (batchOp.retCodeDetails[i].getOperationStatusCode()
2600 != OperationStatusCode.NOT_RUN) {
2601 continue;
2602 }
2603 batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
2604
2605 Mutation m = batchOp.getMutation(i);
2606 Durability tmpDur = getEffectiveDurability(m.getDurability());
2607 if (tmpDur.ordinal() > durability.ordinal()) {
2608 durability = tmpDur;
2609 }
2610 if (tmpDur == Durability.SKIP_WAL) {
2611 recordMutationWithoutWal(m.getFamilyCellMap());
2612 continue;
2613 }
2614
2615 long nonceGroup = batchOp.getNonceGroup(i), nonce = batchOp.getNonce(i);
2616
2617
2618
2619 if (nonceGroup != currentNonceGroup || nonce != currentNonce) {
2620 if (walEdit.size() > 0) {
2621 assert isInReplay;
2622 if (!isInReplay) {
2623 throw new IOException("Multiple nonces per batch and not in replay");
2624 }
2625
2626 txid = this.log.appendNoSync(this.getRegionInfo(), htableDescriptor.getTableName(),
2627 walEdit, m.getClusterIds(), now, htableDescriptor, this.sequenceId, true,
2628 currentNonceGroup, currentNonce);
2629 hasWalAppends = true;
2630 walEdit = new WALEdit(isInReplay);
2631 }
2632 currentNonceGroup = nonceGroup;
2633 currentNonce = nonce;
2634 }
2635
2636
2637 WALEdit fromCP = batchOp.walEditsFromCoprocessors[i];
2638 if (fromCP != null) {
2639 for (KeyValue kv : fromCP.getKeyValues()) {
2640 walEdit.add(kv);
2641 }
2642 }
2643 addFamilyMapToWALEdit(familyMaps[i], walEdit);
2644 }
2645
2646
2647
2648
2649 Mutation mutation = batchOp.getMutation(firstIndex);
2650 if (walEdit.size() > 0) {
2651 txid = this.log.appendNoSync(this.getRegionInfo(), this.htableDescriptor.getTableName(),
2652 walEdit, mutation.getClusterIds(), now, this.htableDescriptor, this.sequenceId,
2653 true, currentNonceGroup, currentNonce);
2654 hasWalAppends = true;
2655 }
2656
2657
2658
2659
2660 if (locked) {
2661 this.updatesLock.readLock().unlock();
2662 locked = false;
2663 }
2664 releaseRowLocks(acquiredRowLocks);
2665
2666
2667
2668
2669 if (hasWalAppends) {
2670 syncOrDefer(txid, durability);
2671 }
2672 doRollBackMemstore = false;
2673
2674 if (!isInReplay && coprocessorHost != null) {
2675 MiniBatchOperationInProgress<Mutation> miniBatchOp =
2676 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
2677 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
2678 coprocessorHost.postBatchMutate(miniBatchOp);
2679 }
2680
2681
2682
2683
2684 if (w != null) {
2685 mvcc.completeMemstoreInsert(w);
2686 w = null;
2687 }
2688
2689
2690
2691
2692
2693 if (!isInReplay && coprocessorHost != null) {
2694 for (int i = firstIndex; i < lastIndexExclusive; i++) {
2695
2696 if (batchOp.retCodeDetails[i].getOperationStatusCode()
2697 != OperationStatusCode.SUCCESS) {
2698 continue;
2699 }
2700 Mutation m = batchOp.getMutation(i);
2701 if (m instanceof Put) {
2702 coprocessorHost.postPut((Put) m, walEdit, m.getDurability());
2703 } else {
2704 coprocessorHost.postDelete((Delete) m, walEdit, m.getDurability());
2705 }
2706 }
2707 }
2708
2709 success = true;
2710 return addedSize;
2711 } finally {
2712
2713
2714 if (doRollBackMemstore) {
2715 rollbackMemstore(batchOp, familyMaps, firstIndex, lastIndexExclusive);
2716 }
2717 if (w != null) mvcc.completeMemstoreInsert(w);
2718
2719 if (locked) {
2720 this.updatesLock.readLock().unlock();
2721 }
2722 releaseRowLocks(acquiredRowLocks);
2723
2724
2725
2726
2727
2728
2729
2730 if (noOfPuts > 0) {
2731
2732 if (this.metricsRegion != null) {
2733 this.metricsRegion.updatePut();
2734 }
2735 }
2736 if (noOfDeletes > 0) {
2737
2738 if (this.metricsRegion != null) {
2739 this.metricsRegion.updateDelete();
2740 }
2741 }
2742 if (!success) {
2743 for (int i = firstIndex; i < lastIndexExclusive; i++) {
2744 if (batchOp.retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.NOT_RUN) {
2745 batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
2746 }
2747 }
2748 }
2749 if (coprocessorHost != null && !batchOp.isInReplay()) {
2750
2751
2752 MiniBatchOperationInProgress<Mutation> miniBatchOp =
2753 new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
2754 batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex,
2755 lastIndexExclusive);
2756 coprocessorHost.postBatchMutateIndispensably(miniBatchOp, success);
2757 }
2758
2759 batchOp.nextIndexToProcess = lastIndexExclusive;
2760 }
2761 }
2762
2763
2764
2765
2766
2767 protected Durability getEffectiveDurability(Durability d) {
2768 return d == Durability.USE_DEFAULT ? this.durability : d;
2769 }
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787 public boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier,
2788 CompareOp compareOp, ByteArrayComparable comparator, Mutation w,
2789 boolean writeToWAL)
2790 throws IOException{
2791 checkReadOnly();
2792
2793
2794 checkResources();
2795 boolean isPut = w instanceof Put;
2796 if (!isPut && !(w instanceof Delete))
2797 throw new org.apache.hadoop.hbase.DoNotRetryIOException("Action must " +
2798 "be Put or Delete");
2799 if (!Bytes.equals(row, w.getRow())) {
2800 throw new org.apache.hadoop.hbase.DoNotRetryIOException("Action's " +
2801 "getRow must match the passed row");
2802 }
2803
2804 startRegionOperation();
2805 try {
2806 Get get = new Get(row);
2807 checkFamily(family);
2808 get.addColumn(family, qualifier);
2809
2810
2811 RowLock rowLock = getRowLock(get.getRow());
2812
2813 mvcc.completeMemstoreInsert(mvcc.beginMemstoreInsert());
2814 try {
2815 if (this.getCoprocessorHost() != null) {
2816 Boolean processed = null;
2817 if (w instanceof Put) {
2818 processed = this.getCoprocessorHost().preCheckAndPutAfterRowLock(row, family,
2819 qualifier, compareOp, comparator, (Put) w);
2820 } else if (w instanceof Delete) {
2821 processed = this.getCoprocessorHost().preCheckAndDeleteAfterRowLock(row, family,
2822 qualifier, compareOp, comparator, (Delete) w);
2823 }
2824 if (processed != null) {
2825 return processed;
2826 }
2827 }
2828 List<Cell> result = get(get, false);
2829
2830 boolean valueIsNull = comparator.getValue() == null ||
2831 comparator.getValue().length == 0;
2832 boolean matches = false;
2833 long cellTs = 0;
2834 if (result.size() == 0 && valueIsNull) {
2835 matches = true;
2836 } else if (result.size() > 0 && result.get(0).getValueLength() == 0 &&
2837 valueIsNull) {
2838 matches = true;
2839 cellTs = result.get(0).getTimestamp();
2840 } else if (result.size() == 1 && !valueIsNull) {
2841 Cell kv = result.get(0);
2842 cellTs = kv.getTimestamp();
2843 int compareResult = comparator.compareTo(kv.getValueArray(),
2844 kv.getValueOffset(), kv.getValueLength());
2845 switch (compareOp) {
2846 case LESS:
2847 matches = compareResult < 0;
2848 break;
2849 case LESS_OR_EQUAL:
2850 matches = compareResult <= 0;
2851 break;
2852 case EQUAL:
2853 matches = compareResult == 0;
2854 break;
2855 case NOT_EQUAL:
2856 matches = compareResult != 0;
2857 break;
2858 case GREATER_OR_EQUAL:
2859 matches = compareResult >= 0;
2860 break;
2861 case GREATER:
2862 matches = compareResult > 0;
2863 break;
2864 default:
2865 throw new RuntimeException("Unknown Compare op " + compareOp.name());
2866 }
2867 }
2868
2869 if (matches) {
2870
2871
2872
2873
2874 long now = EnvironmentEdgeManager.currentTimeMillis();
2875 long ts = Math.max(now, cellTs);
2876 byte[] byteTs = Bytes.toBytes(ts);
2877
2878 if (w instanceof Put) {
2879 updateKVTimestamps(w.getFamilyCellMap().values(), byteTs);
2880 }
2881
2882
2883
2884
2885
2886 doBatchMutate((Mutation)w);
2887 this.checkAndMutateChecksPassed.increment();
2888 return true;
2889 }
2890 this.checkAndMutateChecksFailed.increment();
2891 return false;
2892 } finally {
2893 rowLock.release();
2894 }
2895 } finally {
2896 closeRegionOperation();
2897 }
2898 }
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909 public boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier,
2910 CompareOp compareOp, ByteArrayComparable comparator, RowMutations rm,
2911 boolean writeToWAL)
2912 throws IOException{
2913 checkReadOnly();
2914
2915
2916 checkResources();
2917
2918 startRegionOperation();
2919 try {
2920 Get get = new Get(row);
2921 checkFamily(family);
2922 get.addColumn(family, qualifier);
2923
2924
2925 RowLock rowLock = getRowLock(get.getRow());
2926
2927 mvcc.completeMemstoreInsert(mvcc.beginMemstoreInsert());
2928 try {
2929 List<Cell> result = get(get, false);
2930
2931 boolean valueIsNull = comparator.getValue() == null ||
2932 comparator.getValue().length == 0;
2933 boolean matches = false;
2934 long cellTs = 0;
2935 if (result.size() == 0 && valueIsNull) {
2936 matches = true;
2937 } else if (result.size() > 0 && result.get(0).getValueLength() == 0 &&
2938 valueIsNull) {
2939 matches = true;
2940 cellTs = result.get(0).getTimestamp();
2941 } else if (result.size() == 1 && !valueIsNull) {
2942 Cell kv = result.get(0);
2943 cellTs = kv.getTimestamp();
2944 int compareResult = comparator.compareTo(kv.getValueArray(),
2945 kv.getValueOffset(), kv.getValueLength());
2946 switch (compareOp) {
2947 case LESS:
2948 matches = compareResult < 0;
2949 break;
2950 case LESS_OR_EQUAL:
2951 matches = compareResult <= 0;
2952 break;
2953 case EQUAL:
2954 matches = compareResult == 0;
2955 break;
2956 case NOT_EQUAL:
2957 matches = compareResult != 0;
2958 break;
2959 case GREATER_OR_EQUAL:
2960 matches = compareResult >= 0;
2961 break;
2962 case GREATER:
2963 matches = compareResult > 0;
2964 break;
2965 default:
2966 throw new RuntimeException("Unknown Compare op " + compareOp.name());
2967 }
2968 }
2969
2970 if (matches) {
2971
2972
2973
2974
2975 long now = EnvironmentEdgeManager.currentTimeMillis();
2976 long ts = Math.max(now, cellTs);
2977 byte[] byteTs = Bytes.toBytes(ts);
2978
2979 for (Mutation w : rm.getMutations()) {
2980 if (w instanceof Put) {
2981 updateKVTimestamps(w.getFamilyCellMap().values(), byteTs);
2982 }
2983
2984
2985 }
2986
2987
2988
2989 mutateRow(rm);
2990 this.checkAndMutateChecksPassed.increment();
2991 return true;
2992 }
2993 this.checkAndMutateChecksFailed.increment();
2994 return false;
2995 } finally {
2996 rowLock.release();
2997 }
2998 } finally {
2999 closeRegionOperation();
3000 }
3001 }
3002
3003 private void doBatchMutate(Mutation mutation) throws IOException, DoNotRetryIOException {
3004
3005 OperationStatus[] batchMutate = this.batchMutate(new Mutation[] { mutation },
3006 HConstants.NO_NONCE, HConstants.NO_NONCE);
3007 if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.SANITY_CHECK_FAILURE)) {
3008 throw new FailedSanityCheckException(batchMutate[0].getExceptionMsg());
3009 } else if (batchMutate[0].getOperationStatusCode().equals(OperationStatusCode.BAD_FAMILY)) {
3010 throw new NoSuchColumnFamilyException(batchMutate[0].getExceptionMsg());
3011 }
3012 }
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027 public void addRegionToSnapshot(SnapshotDescription desc,
3028 ForeignExceptionSnare exnSnare) throws IOException {
3029 Path rootDir = FSUtils.getRootDir(conf);
3030 Path snapshotDir = SnapshotDescriptionUtils.getWorkingSnapshotDir(desc, rootDir);
3031
3032 SnapshotManifest manifest = SnapshotManifest.create(conf, getFilesystem(),
3033 snapshotDir, desc, exnSnare);
3034 manifest.addRegion(this);
3035 }
3036
3037
3038
3039
3040
3041 void updateKVTimestamps(final Iterable<List<Cell>> keyLists, final byte[] now) {
3042 for (List<Cell> cells: keyLists) {
3043 if (cells == null) continue;
3044 assert cells instanceof RandomAccess;
3045 int listSize = cells.size();
3046 for (int i=0; i < listSize; i++) {
3047 Cell cell = cells.get(i);
3048 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
3049 kv.updateLatestStamp(now);
3050 }
3051 }
3052 }
3053
3054
3055
3056
3057 void rewriteCellTags(Map<byte[], List<Cell>> familyMap, final Mutation m) {
3058
3059
3060
3061 if (m.getTTL() == Long.MAX_VALUE) {
3062 return;
3063 }
3064
3065
3066
3067 for (Map.Entry<byte[], List<Cell>> e: familyMap.entrySet()) {
3068 List<Cell> cells = e.getValue();
3069 assert cells instanceof RandomAccess;
3070 int listSize = cells.size();
3071 for (int i = 0; i < listSize; i++) {
3072 Cell cell = cells.get(i);
3073 List<Tag> newTags = new ArrayList<Tag>();
3074 Iterator<Tag> tagIterator = CellUtil.tagsIterator(cell.getTagsArray(),
3075 cell.getTagsOffset(), cell.getTagsLengthUnsigned());
3076
3077
3078
3079 while (tagIterator.hasNext()) {
3080
3081
3082
3083 newTags.add(tagIterator.next());
3084 }
3085
3086
3087
3088
3089
3090 if (m.getTTL() != Long.MAX_VALUE) {
3091
3092 newTags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(m.getTTL())));
3093 }
3094
3095
3096
3097 cells.set(i, new KeyValue(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
3098 cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
3099 cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
3100 cell.getTimestamp(), KeyValue.Type.codeToType(cell.getTypeByte()),
3101 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength(),
3102 newTags));
3103 }
3104 }
3105 }
3106
3107
3108
3109
3110
3111
3112
3113 private void checkResources()
3114 throws RegionTooBusyException {
3115
3116 if (this.getRegionInfo().isMetaRegion()) return;
3117
3118 if (this.memstoreSize.get() > this.blockingMemStoreSize) {
3119 blockedRequestsCount.increment();
3120 requestFlush();
3121 throw new RegionTooBusyException("Above memstore limit, " +
3122 "regionName=" + (this.getRegionInfo() == null ? "unknown" :
3123 this.getRegionInfo().getRegionNameAsString()) +
3124 ", server=" + (this.getRegionServerServices() == null ? "unknown" :
3125 this.getRegionServerServices().getServerName()) +
3126 ", memstoreSize=" + memstoreSize.get() +
3127 ", blockingMemStoreSize=" + blockingMemStoreSize);
3128 }
3129 }
3130
3131
3132
3133
3134 protected void checkReadOnly() throws IOException {
3135 if (this.writestate.isReadOnly()) {
3136 throw new DoNotRetryIOException("region is read only");
3137 }
3138 }
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148 private void put(final byte [] row, byte [] family, List<Cell> edits)
3149 throws IOException {
3150 NavigableMap<byte[], List<Cell>> familyMap;
3151 familyMap = new TreeMap<byte[], List<Cell>>(Bytes.BYTES_COMPARATOR);
3152
3153 familyMap.put(family, edits);
3154 Put p = new Put(row);
3155 p.setFamilyCellMap(familyMap);
3156 doBatchMutate(p);
3157 }
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171 private long applyFamilyMapToMemstore(Map<byte[], List<Cell>> familyMap,
3172 MultiVersionConsistencyControl.WriteEntry localizedWriteEntry) {
3173 long size = 0;
3174 boolean freemvcc = false;
3175
3176 try {
3177 if (localizedWriteEntry == null) {
3178 localizedWriteEntry = mvcc.beginMemstoreInsert();
3179 freemvcc = true;
3180 }
3181
3182 for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
3183 byte[] family = e.getKey();
3184 List<Cell> cells = e.getValue();
3185 assert cells instanceof RandomAccess;
3186 Store store = getStore(family);
3187 int listSize = cells.size();
3188 for (int i=0; i < listSize; i++) {
3189 Cell cell = cells.get(i);
3190 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
3191 kv.setMvccVersion(localizedWriteEntry.getWriteNumber());
3192 size += store.add(kv);
3193 }
3194 }
3195 } finally {
3196 if (freemvcc) {
3197 mvcc.completeMemstoreInsert(localizedWriteEntry);
3198 }
3199 }
3200
3201 return size;
3202 }
3203
3204
3205
3206
3207
3208
3209 private void rollbackMemstore(BatchOperationInProgress<?> batchOp,
3210 Map<byte[], List<Cell>>[] familyMaps,
3211 int start, int end) {
3212 int kvsRolledback = 0;
3213 for (int i = start; i < end; i++) {
3214
3215 if (batchOp.retCodeDetails[i].getOperationStatusCode()
3216 != OperationStatusCode.SUCCESS) {
3217 continue;
3218 }
3219
3220
3221 Map<byte[], List<Cell>> familyMap = familyMaps[i];
3222 for (Map.Entry<byte[], List<Cell>> e : familyMap.entrySet()) {
3223 byte[] family = e.getKey();
3224 List<Cell> cells = e.getValue();
3225
3226
3227
3228
3229 Store store = getStore(family);
3230 for (Cell cell: cells) {
3231 store.rollback(KeyValueUtil.ensureKeyValue(cell));
3232 kvsRolledback++;
3233 }
3234 }
3235 }
3236 LOG.debug("rollbackMemstore rolled back " + kvsRolledback +
3237 " keyvalues from start:" + start + " to end:" + end);
3238 }
3239
3240
3241
3242
3243
3244 void checkFamilies(Collection<byte[]> families)
3245 throws NoSuchColumnFamilyException {
3246 for (byte[] family : families) {
3247 checkFamily(family);
3248 }
3249 }
3250
3251
3252
3253
3254
3255 private void removeNonExistentColumnFamilyForReplay(
3256 final Map<byte[], List<Cell>> familyMap) {
3257 List<byte[]> nonExistentList = null;
3258 for (byte[] family : familyMap.keySet()) {
3259 if (!this.htableDescriptor.hasFamily(family)) {
3260 if (nonExistentList == null) {
3261 nonExistentList = new ArrayList<byte[]>();
3262 }
3263 nonExistentList.add(family);
3264 }
3265 }
3266 if (nonExistentList != null) {
3267 for (byte[] family : nonExistentList) {
3268
3269 LOG.info("No family for " + Bytes.toString(family) + " omit from reply.");
3270 familyMap.remove(family);
3271 }
3272 }
3273 }
3274
3275 void checkTimestamps(final Map<byte[], List<Cell>> familyMap,
3276 long now) throws FailedSanityCheckException {
3277 if (timestampSlop == HConstants.LATEST_TIMESTAMP) {
3278 return;
3279 }
3280 long maxTs = now + timestampSlop;
3281 for (List<Cell> kvs : familyMap.values()) {
3282 assert kvs instanceof RandomAccess;
3283 int listSize = kvs.size();
3284 for (int i=0; i < listSize; i++) {
3285 Cell cell = kvs.get(i);
3286
3287 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
3288 if (!kv.isLatestTimestamp() && kv.getTimestamp() > maxTs) {
3289 throw new FailedSanityCheckException("Timestamp for KV out of range "
3290 + cell + " (too.new=" + timestampSlop + ")");
3291 }
3292 }
3293 }
3294 }
3295
3296
3297
3298
3299
3300
3301
3302 private void addFamilyMapToWALEdit(Map<byte[], List<Cell>> familyMap,
3303 WALEdit walEdit) {
3304 for (List<Cell> edits : familyMap.values()) {
3305 assert edits instanceof RandomAccess;
3306 int listSize = edits.size();
3307 for (int i=0; i < listSize; i++) {
3308 Cell cell = edits.get(i);
3309 walEdit.add(KeyValueUtil.ensureKeyValue(cell));
3310 }
3311 }
3312 }
3313
3314 private void requestFlush() {
3315 if (this.rsServices == null) {
3316 return;
3317 }
3318 synchronized (writestate) {
3319 if (this.writestate.isFlushRequested()) {
3320 return;
3321 }
3322 writestate.flushRequested = true;
3323 }
3324
3325 this.rsServices.getFlushRequester().requestFlush(this);
3326 if (LOG.isDebugEnabled()) {
3327 LOG.debug("Flush requested on " + this);
3328 }
3329 }
3330
3331
3332
3333
3334
3335 private boolean isFlushSize(final long size) {
3336 return size > this.memstoreFlushSize;
3337 }
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375 protected long replayRecoveredEditsIfAny(final Path regiondir,
3376 Map<byte[], Long> maxSeqIdInStores,
3377 final CancelableProgressable reporter, final MonitoredTask status)
3378 throws UnsupportedEncodingException, IOException {
3379 long minSeqIdForTheRegion = -1;
3380 for (Long maxSeqIdInStore : maxSeqIdInStores.values()) {
3381 if (maxSeqIdInStore < minSeqIdForTheRegion || minSeqIdForTheRegion == -1) {
3382 minSeqIdForTheRegion = maxSeqIdInStore;
3383 }
3384 }
3385 long seqid = minSeqIdForTheRegion;
3386
3387 FileSystem fs = this.fs.getFileSystem();
3388 NavigableSet<Path> files = HLogUtil.getSplitEditFilesSorted(fs, regiondir);
3389 if (LOG.isDebugEnabled()) {
3390 LOG.debug("Found " + (files == null ? 0 : files.size())
3391 + " recovered edits file(s) under " + regiondir);
3392 }
3393
3394 if (files == null || files.isEmpty()) return seqid;
3395
3396 for (Path edits: files) {
3397 if (edits == null || !fs.exists(edits)) {
3398 LOG.warn("Null or non-existent edits file: " + edits);
3399 continue;
3400 }
3401 if (isZeroLengthThenDelete(fs, edits)) continue;
3402
3403 long maxSeqId;
3404 String fileName = edits.getName();
3405 maxSeqId = Math.abs(Long.parseLong(fileName));
3406 if (maxSeqId <= minSeqIdForTheRegion) {
3407 if (LOG.isDebugEnabled()) {
3408 String msg = "Maximum sequenceid for this log is " + maxSeqId
3409 + " and minimum sequenceid for the region is " + minSeqIdForTheRegion
3410 + ", skipped the whole file, path=" + edits;
3411 LOG.debug(msg);
3412 }
3413 continue;
3414 }
3415
3416 try {
3417
3418 seqid = Math.max(seqid, replayRecoveredEdits(edits, maxSeqIdInStores, reporter));
3419 } catch (IOException e) {
3420 boolean skipErrors = conf.getBoolean(
3421 HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS,
3422 conf.getBoolean(
3423 "hbase.skip.errors",
3424 HConstants.DEFAULT_HREGION_EDITS_REPLAY_SKIP_ERRORS));
3425 if (conf.get("hbase.skip.errors") != null) {
3426 LOG.warn(
3427 "The property 'hbase.skip.errors' has been deprecated. Please use " +
3428 HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS + " instead.");
3429 }
3430 if (skipErrors) {
3431 Path p = HLogUtil.moveAsideBadEditsFile(fs, edits);
3432 LOG.error(HConstants.HREGION_EDITS_REPLAY_SKIP_ERRORS
3433 + "=true so continuing. Renamed " + edits +
3434 " as " + p, e);
3435 } else {
3436 throw e;
3437 }
3438 }
3439 }
3440
3441
3442 if (this.rsAccounting != null) {
3443 this.rsAccounting.clearRegionReplayEditsSize(this.getRegionName());
3444 }
3445 if (seqid > minSeqIdForTheRegion) {
3446
3447 internalFlushcache(null, seqid, status);
3448 }
3449
3450 for (Path file: files) {
3451 if (!fs.delete(file, false)) {
3452 LOG.error("Failed delete of " + file);
3453 } else {
3454 LOG.debug("Deleted recovered.edits file=" + file);
3455 }
3456 }
3457 return seqid;
3458 }
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469 private long replayRecoveredEdits(final Path edits,
3470 Map<byte[], Long> maxSeqIdInStores, final CancelableProgressable reporter)
3471 throws IOException {
3472 String msg = "Replaying edits from " + edits;
3473 LOG.info(msg);
3474 MonitoredTask status = TaskMonitor.get().createStatus(msg);
3475 FileSystem fs = this.fs.getFileSystem();
3476
3477 status.setStatus("Opening logs");
3478 HLog.Reader reader = null;
3479 try {
3480 reader = HLogFactory.createReader(fs, edits, conf);
3481 long currentEditSeqId = -1;
3482 long firstSeqIdInLog = -1;
3483 long skippedEdits = 0;
3484 long editsCount = 0;
3485 long intervalEdits = 0;
3486 HLog.Entry entry;
3487 Store store = null;
3488 boolean reported_once = false;
3489 ServerNonceManager ng = this.rsServices == null ? null : this.rsServices.getNonceManager();
3490
3491 try {
3492
3493 int interval = this.conf.getInt("hbase.hstore.report.interval.edits",
3494 2000);
3495
3496 int period = this.conf.getInt("hbase.hstore.report.period",
3497 this.conf.getInt(AssignmentManager.ASSIGNMENT_TIMEOUT,
3498 AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT) / 2);
3499 long lastReport = EnvironmentEdgeManager.currentTimeMillis();
3500
3501 while ((entry = reader.next()) != null) {
3502 HLogKey key = entry.getKey();
3503 WALEdit val = entry.getEdit();
3504
3505 if (ng != null) {
3506 ng.reportOperationFromWal(key.getNonceGroup(), key.getNonce(), key.getWriteTime());
3507 }
3508
3509 if (reporter != null) {
3510 intervalEdits += val.size();
3511 if (intervalEdits >= interval) {
3512
3513 intervalEdits = 0;
3514 long cur = EnvironmentEdgeManager.currentTimeMillis();
3515 if (lastReport + period <= cur) {
3516 status.setStatus("Replaying edits..." +
3517 " skipped=" + skippedEdits +
3518 " edits=" + editsCount);
3519
3520 if(!reporter.progress()) {
3521 msg = "Progressable reporter failed, stopping replay";
3522 LOG.warn(msg);
3523 status.abort(msg);
3524 throw new IOException(msg);
3525 }
3526 reported_once = true;
3527 lastReport = cur;
3528 }
3529 }
3530 }
3531
3532 if (firstSeqIdInLog == -1) {
3533 firstSeqIdInLog = key.getLogSeqNum();
3534 }
3535 currentEditSeqId = key.getLogSeqNum();
3536
3537
3538
3539 if (coprocessorHost != null) {
3540 status.setStatus("Running pre-WAL-restore hook in coprocessors");
3541 if (coprocessorHost.preWALRestore(this.getRegionInfo(), key, val)) {
3542
3543 continue;
3544 }
3545 }
3546 boolean checkRowWithinBoundary = false;
3547
3548 if (!Bytes.equals(key.getEncodedRegionName(),
3549 this.getRegionInfo().getEncodedNameAsBytes())) {
3550 checkRowWithinBoundary = true;
3551 }
3552
3553 boolean flush = false;
3554 for (KeyValue kv: val.getKeyValues()) {
3555
3556
3557 if (kv.matchingFamily(WALEdit.METAFAMILY) ||
3558 !Bytes.equals(key.getEncodedRegionName(),
3559 this.getRegionInfo().getEncodedNameAsBytes())) {
3560
3561 if (!checkRowWithinBoundary) {
3562
3563 CompactionDescriptor compaction = WALEdit.getCompaction(kv);
3564 if (compaction != null) {
3565
3566 completeCompactionMarker(compaction);
3567 }
3568 }
3569
3570 skippedEdits++;
3571 continue;
3572 }
3573
3574 if (store == null || !kv.matchingFamily(store.getFamily().getName())) {
3575 store = this.stores.get(kv.getFamily());
3576 }
3577 if (store == null) {
3578
3579
3580 LOG.warn("No family for " + kv);
3581 skippedEdits++;
3582 continue;
3583 }
3584 if (checkRowWithinBoundary && !rowIsInRange(this.getRegionInfo(),
3585 kv.getRowArray(), kv.getRowOffset(), kv.getRowLength())) {
3586 LOG.warn("Row of " + kv + " is not within region boundary");
3587 skippedEdits++;
3588 continue;
3589 }
3590
3591 if (key.getLogSeqNum() <= maxSeqIdInStores.get(store.getFamily()
3592 .getName())) {
3593 skippedEdits++;
3594 continue;
3595 }
3596
3597
3598
3599 flush |= restoreEdit(store, kv);
3600 editsCount++;
3601 }
3602 if (flush) {
3603 internalFlushcache(null, currentEditSeqId, status);
3604 }
3605
3606 if (coprocessorHost != null) {
3607 coprocessorHost.postWALRestore(this.getRegionInfo(), key, val);
3608 }
3609 }
3610 } catch (EOFException eof) {
3611 Path p = HLogUtil.moveAsideBadEditsFile(fs, edits);
3612 msg = "Encountered EOF. Most likely due to Master failure during " +
3613 "log spliting, so we have this data in another edit. " +
3614 "Continuing, but renaming " + edits + " as " + p;
3615 LOG.warn(msg, eof);
3616 status.abort(msg);
3617 } catch (IOException ioe) {
3618
3619
3620 if (ioe.getCause() instanceof ParseException) {
3621 Path p = HLogUtil.moveAsideBadEditsFile(fs, edits);
3622 msg = "File corruption encountered! " +
3623 "Continuing, but renaming " + edits + " as " + p;
3624 LOG.warn(msg, ioe);
3625 status.setStatus(msg);
3626 } else {
3627 status.abort(StringUtils.stringifyException(ioe));
3628
3629
3630 throw ioe;
3631 }
3632 }
3633 if (reporter != null && !reported_once) {
3634 reporter.progress();
3635 }
3636 msg = "Applied " + editsCount + ", skipped " + skippedEdits +
3637 ", firstSequenceidInLog=" + firstSeqIdInLog +
3638 ", maxSequenceidInLog=" + currentEditSeqId + ", path=" + edits;
3639 status.markComplete(msg);
3640 LOG.debug(msg);
3641 return currentEditSeqId;
3642 } finally {
3643 status.cleanup();
3644 if (reader != null) {
3645 reader.close();
3646 }
3647 }
3648 }
3649
3650
3651
3652
3653
3654
3655
3656 void completeCompactionMarker(CompactionDescriptor compaction)
3657 throws IOException {
3658 Store store = this.getStore(compaction.getFamilyName().toByteArray());
3659 if (store == null) {
3660 LOG.warn("Found Compaction WAL edit for deleted family:" +
3661 Bytes.toString(compaction.getFamilyName().toByteArray()));
3662 return;
3663 }
3664 store.completeCompactionMarker(compaction);
3665 }
3666
3667
3668
3669
3670
3671
3672
3673 protected boolean restoreEdit(final Store s, final KeyValue kv) {
3674 long kvSize = s.add(kv);
3675 if (this.rsAccounting != null) {
3676 rsAccounting.addAndGetRegionReplayEditsSize(this.getRegionName(), kvSize);
3677 }
3678 return isFlushSize(this.addAndGetGlobalMemstoreSize(kvSize));
3679 }
3680
3681
3682
3683
3684
3685
3686
3687 private static boolean isZeroLengthThenDelete(final FileSystem fs, final Path p)
3688 throws IOException {
3689 FileStatus stat = fs.getFileStatus(p);
3690 if (stat.getLen() > 0) return false;
3691 LOG.warn("File " + p + " is zero-length, deleting.");
3692 fs.delete(p, false);
3693 return true;
3694 }
3695
3696 protected HStore instantiateHStore(final HColumnDescriptor family) throws IOException {
3697 return new HStore(this, family, this.conf);
3698 }
3699
3700
3701
3702
3703
3704
3705
3706
3707 public Store getStore(final byte[] column) {
3708 return this.stores.get(column);
3709 }
3710
3711 public Map<byte[], Store> getStores() {
3712 return this.stores;
3713 }
3714
3715
3716
3717
3718
3719
3720
3721
3722 public List<String> getStoreFileList(final byte [][] columns)
3723 throws IllegalArgumentException {
3724 List<String> storeFileNames = new ArrayList<String>();
3725 synchronized(closeLock) {
3726 for(byte[] column : columns) {
3727 Store store = this.stores.get(column);
3728 if (store == null) {
3729 throw new IllegalArgumentException("No column family : " +
3730 new String(column) + " available");
3731 }
3732 for (StoreFile storeFile: store.getStorefiles()) {
3733 storeFileNames.add(storeFile.getPath().toString());
3734 }
3735 }
3736 }
3737 return storeFileNames;
3738 }
3739
3740
3741
3742
3743
3744 void checkRow(final byte [] row, String op) throws IOException {
3745 if (!rowIsInRange(getRegionInfo(), row)) {
3746 throw new WrongRegionException("Requested row out of range for " +
3747 op + " on HRegion " + this + ", startKey='" +
3748 Bytes.toStringBinary(getStartKey()) + "', getEndKey()='" +
3749 Bytes.toStringBinary(getEndKey()) + "', row='" +
3750 Bytes.toStringBinary(row) + "'");
3751 }
3752 }
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763 public RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException {
3764 startRegionOperation();
3765 try {
3766 return getRowLockInternal(row, waitForLock);
3767 } finally {
3768 closeRegionOperation();
3769 }
3770 }
3771
3772
3773
3774
3775
3776 protected RowLock getRowLockInternal(byte[] row, boolean waitForLock) throws IOException {
3777 HashedBytes rowKey = new HashedBytes(row);
3778 RowLockContext rowLockContext = new RowLockContext(rowKey);
3779
3780
3781 while (true) {
3782 RowLockContext existingContext = lockedRows.putIfAbsent(rowKey, rowLockContext);
3783 if (existingContext == null) {
3784
3785 break;
3786 } else if (existingContext.ownedByCurrentThread()) {
3787
3788 rowLockContext = existingContext;
3789 break;
3790 } else {
3791
3792 if (!waitForLock) {
3793 return null;
3794 }
3795 TraceScope traceScope = null;
3796 try {
3797 if (Trace.isTracing()) {
3798 traceScope = Trace.startSpan("HRegion.getRowLockInternal");
3799 }
3800 if (!existingContext.latch.await(this.rowLockWaitDuration, TimeUnit.MILLISECONDS)) {
3801 if(traceScope != null) {
3802 traceScope.getSpan().addTimelineAnnotation("Failed to get row lock");
3803 }
3804 throw new IOException("Timed out waiting for lock for row: " + rowKey);
3805 }
3806 if (traceScope != null) traceScope.close();
3807 traceScope = null;
3808 } catch (InterruptedException ie) {
3809 LOG.warn("Thread interrupted waiting for lock on row: " + rowKey);
3810 InterruptedIOException iie = new InterruptedIOException();
3811 iie.initCause(ie);
3812 throw iie;
3813 } finally {
3814 if (traceScope != null) traceScope.close();
3815 }
3816 }
3817 }
3818
3819
3820 return rowLockContext.newLock();
3821 }
3822
3823
3824
3825
3826
3827
3828
3829 public RowLock getRowLock(byte[] row) throws IOException {
3830 return getRowLock(row, true);
3831 }
3832
3833
3834
3835
3836 public void releaseRowLocks(List<RowLock> rowLocks) {
3837 if (rowLocks != null) {
3838 for (RowLock rowLock : rowLocks) {
3839 rowLock.release();
3840 }
3841 rowLocks.clear();
3842 }
3843 }
3844
3845
3846
3847
3848
3849
3850
3851 private static boolean hasMultipleColumnFamilies(
3852 List<Pair<byte[], String>> familyPaths) {
3853 boolean multipleFamilies = false;
3854 byte[] family = null;
3855 for (Pair<byte[], String> pair : familyPaths) {
3856 byte[] fam = pair.getFirst();
3857 if (family == null) {
3858 family = fam;
3859 } else if (!Bytes.equals(family, fam)) {
3860 multipleFamilies = true;
3861 break;
3862 }
3863 }
3864 return multipleFamilies;
3865 }
3866
3867
3868 public boolean bulkLoadHFiles(List<Pair<byte[], String>> familyPaths,
3869 boolean assignSeqId) throws IOException {
3870 return bulkLoadHFiles(familyPaths, assignSeqId, null);
3871 }
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884 public boolean bulkLoadHFiles(List<Pair<byte[], String>> familyPaths, boolean assignSeqId,
3885 BulkLoadListener bulkLoadListener) throws IOException {
3886 Preconditions.checkNotNull(familyPaths);
3887
3888 startBulkRegionOperation(hasMultipleColumnFamilies(familyPaths));
3889 try {
3890 this.writeRequestsCount.increment();
3891
3892
3893
3894
3895 List<IOException> ioes = new ArrayList<IOException>();
3896 List<Pair<byte[], String>> failures = new ArrayList<Pair<byte[], String>>();
3897 for (Pair<byte[], String> p : familyPaths) {
3898 byte[] familyName = p.getFirst();
3899 String path = p.getSecond();
3900
3901 Store store = getStore(familyName);
3902 if (store == null) {
3903 IOException ioe = new org.apache.hadoop.hbase.DoNotRetryIOException(
3904 "No such column family " + Bytes.toStringBinary(familyName));
3905 ioes.add(ioe);
3906 } else {
3907 try {
3908 store.assertBulkLoadHFileOk(new Path(path));
3909 } catch (WrongRegionException wre) {
3910
3911 failures.add(p);
3912 } catch (IOException ioe) {
3913
3914 ioes.add(ioe);
3915 }
3916 }
3917 }
3918
3919
3920 if (ioes.size() != 0) {
3921 IOException e = MultipleIOException.createIOException(ioes);
3922 LOG.error("There were one or more IO errors when checking if the bulk load is ok.", e);
3923 throw e;
3924 }
3925
3926
3927 if (failures.size() != 0) {
3928 StringBuilder list = new StringBuilder();
3929 for (Pair<byte[], String> p : failures) {
3930 list.append("\n").append(Bytes.toString(p.getFirst())).append(" : ")
3931 .append(p.getSecond());
3932 }
3933
3934 LOG.warn("There was a recoverable bulk load failure likely due to a" +
3935 " split. These (family, HFile) pairs were not loaded: " + list);
3936 return false;
3937 }
3938
3939 long seqId = -1;
3940
3941
3942
3943 if (assignSeqId) {
3944 FlushResult fs = this.flushcache();
3945 if (fs.isFlushSucceeded()) {
3946 seqId = fs.flushSequenceId;
3947 } else if (fs.result == FlushResult.Result.CANNOT_FLUSH_MEMSTORE_EMPTY) {
3948 seqId = this.sequenceId.incrementAndGet();
3949 } else {
3950 throw new IOException("Could not bulk load with an assigned sequential ID because the " +
3951 "flush didn't run. Reason for not flushing: " + fs.failureReason);
3952 }
3953 }
3954
3955 for (Pair<byte[], String> p : familyPaths) {
3956 byte[] familyName = p.getFirst();
3957 String path = p.getSecond();
3958 Store store = getStore(familyName);
3959 try {
3960 String finalPath = path;
3961 if(bulkLoadListener != null) {
3962 finalPath = bulkLoadListener.prepareBulkLoad(familyName, path);
3963 }
3964 store.bulkLoadHFile(finalPath, seqId);
3965 if(bulkLoadListener != null) {
3966 bulkLoadListener.doneBulkLoad(familyName, path);
3967 }
3968 } catch (IOException ioe) {
3969
3970
3971
3972
3973 LOG.error("There was a partial failure due to IO when attempting to" +
3974 " load " + Bytes.toString(p.getFirst()) + " : "+ p.getSecond(), ioe);
3975 if(bulkLoadListener != null) {
3976 try {
3977 bulkLoadListener.failedBulkLoad(familyName, path);
3978 } catch (Exception ex) {
3979 LOG.error("Error while calling failedBulkLoad for family "+
3980 Bytes.toString(familyName)+" with path "+path, ex);
3981 }
3982 }
3983 throw ioe;
3984 }
3985 }
3986 return true;
3987 } finally {
3988 closeBulkRegionOperation();
3989 }
3990 }
3991
3992 @Override
3993 public boolean equals(Object o) {
3994 return o instanceof HRegion && Bytes.equals(this.getRegionName(),
3995 ((HRegion) o).getRegionName());
3996 }
3997
3998 @Override
3999 public int hashCode() {
4000 return Bytes.hashCode(this.getRegionName());
4001 }
4002
4003 @Override
4004 public String toString() {
4005 return this.getRegionNameAsString();
4006 }
4007
4008
4009
4010
4011 class RegionScannerImpl implements RegionScanner {
4012
4013 KeyValueHeap storeHeap = null;
4014
4015
4016 KeyValueHeap joinedHeap = null;
4017
4018
4019
4020 protected KeyValue joinedContinuationRow = null;
4021
4022 private final KeyValue KV_LIMIT = new KeyValue();
4023 protected final byte[] stopRow;
4024 private final FilterWrapper filter;
4025 private int batch;
4026 protected int isScan;
4027 private boolean filterClosed = false;
4028 private long readPt;
4029 private long maxResultSize;
4030 protected HRegion region;
4031
4032 @Override
4033 public HRegionInfo getRegionInfo() {
4034 return region.getRegionInfo();
4035 }
4036
4037 RegionScannerImpl(Scan scan, List<KeyValueScanner> additionalScanners, HRegion region)
4038 throws IOException {
4039
4040 this.region = region;
4041 this.maxResultSize = scan.getMaxResultSize();
4042 if (scan.hasFilter()) {
4043 this.filter = new FilterWrapper(scan.getFilter());
4044 } else {
4045 this.filter = null;
4046 }
4047
4048 this.batch = scan.getBatch();
4049 if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW) && !scan.isGetScan()) {
4050 this.stopRow = null;
4051 } else {
4052 this.stopRow = scan.getStopRow();
4053 }
4054
4055
4056 this.isScan = scan.isGetScan() ? -1 : 0;
4057
4058
4059
4060 IsolationLevel isolationLevel = scan.getIsolationLevel();
4061 synchronized(scannerReadPoints) {
4062 this.readPt = getReadpoint(isolationLevel);
4063 scannerReadPoints.put(this, this.readPt);
4064 }
4065
4066
4067
4068 List<KeyValueScanner> scanners = new ArrayList<KeyValueScanner>(scan.getFamilyMap().size());
4069 List<KeyValueScanner> joinedScanners =
4070 new ArrayList<KeyValueScanner>(scan.getFamilyMap().size());
4071 if (additionalScanners != null) {
4072 scanners.addAll(additionalScanners);
4073 }
4074
4075 for (Map.Entry<byte[], NavigableSet<byte[]>> entry :
4076 scan.getFamilyMap().entrySet()) {
4077 Store store = stores.get(entry.getKey());
4078 KeyValueScanner scanner;
4079 try {
4080 scanner = store.getScanner(scan, entry.getValue(), this.readPt);
4081 } catch (FileNotFoundException e) {
4082 abortRegionServer(e.getMessage());
4083 throw new NotServingRegionException(region.getRegionNameAsString() + " is closing");
4084 }
4085 if (this.filter == null || !scan.doLoadColumnFamiliesOnDemand()
4086 || this.filter.isFamilyEssential(entry.getKey())) {
4087 scanners.add(scanner);
4088 } else {
4089 joinedScanners.add(scanner);
4090 }
4091 }
4092 initializeKVHeap(scanners, joinedScanners, region);
4093 }
4094
4095 RegionScannerImpl(Scan scan, HRegion region) throws IOException {
4096 this(scan, null, region);
4097 }
4098
4099 protected void initializeKVHeap(List<KeyValueScanner> scanners,
4100 List<KeyValueScanner> joinedScanners, HRegion region)
4101 throws IOException {
4102 this.storeHeap = new KeyValueHeap(scanners, region.comparator);
4103 if (!joinedScanners.isEmpty()) {
4104 this.joinedHeap = new KeyValueHeap(joinedScanners, region.comparator);
4105 }
4106 }
4107
4108 @Override
4109 public long getMaxResultSize() {
4110 return maxResultSize;
4111 }
4112
4113 @Override
4114 public long getMvccReadPoint() {
4115 return this.readPt;
4116 }
4117
4118
4119
4120
4121
4122
4123 protected void resetFilters() throws IOException {
4124 if (filter != null) {
4125 filter.reset();
4126 }
4127 }
4128
4129 @Override
4130 public boolean next(List<Cell> outResults)
4131 throws IOException {
4132
4133 return next(outResults, batch);
4134 }
4135
4136 @Override
4137 public synchronized boolean next(List<Cell> outResults, int limit) throws IOException {
4138 if (this.filterClosed) {
4139 throw new UnknownScannerException("Scanner was closed (timed out?) " +
4140 "after we renewed it. Could be caused by a very slow scanner " +
4141 "or a lengthy garbage collection");
4142 }
4143 startRegionOperation(Operation.SCAN);
4144 readRequestsCount.increment();
4145 try {
4146 boolean returnResult = nextRaw(outResults, limit);
4147 if (region != null && region.metricsRegion != null) {
4148 long totalSize = 0;
4149 for (Cell cell: outResults) {
4150 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
4151 totalSize += kv.getLength();
4152 }
4153 region.metricsRegion.updateScanNext(totalSize);
4154 }
4155 return returnResult;
4156 } finally {
4157 closeRegionOperation(Operation.SCAN);
4158 }
4159 }
4160
4161 @Override
4162 public boolean nextRaw(List<Cell> outResults)
4163 throws IOException {
4164 return nextRaw(outResults, batch);
4165 }
4166
4167 @Override
4168 public boolean nextRaw(List<Cell> outResults, int limit) throws IOException {
4169 if (storeHeap == null) {
4170
4171 throw new UnknownScannerException("Scanner was closed");
4172 }
4173 boolean returnResult;
4174 if (outResults.isEmpty()) {
4175
4176
4177 returnResult = nextInternal(outResults, limit);
4178 } else {
4179 List<Cell> tmpList = new ArrayList<Cell>();
4180 returnResult = nextInternal(tmpList, limit);
4181 outResults.addAll(tmpList);
4182 }
4183 resetFilters();
4184 if (isFilterDoneInternal()) {
4185 returnResult = false;
4186 }
4187 return returnResult;
4188 }
4189
4190 private void populateFromJoinedHeap(List<Cell> results, int limit)
4191 throws IOException {
4192 assert joinedContinuationRow != null;
4193 KeyValue kv = populateResult(results, this.joinedHeap, limit,
4194 joinedContinuationRow.getBuffer(), joinedContinuationRow.getRowOffset(),
4195 joinedContinuationRow.getRowLength());
4196 if (kv != KV_LIMIT) {
4197
4198 joinedContinuationRow = null;
4199 }
4200
4201
4202 Collections.sort(results, comparator);
4203 }
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215 private KeyValue populateResult(List<Cell> results, KeyValueHeap heap, int limit,
4216 byte[] currentRow, int offset, short length) throws IOException {
4217 KeyValue nextKv;
4218 try {
4219 do {
4220 heap.next(results, limit - results.size());
4221 if (limit > 0 && results.size() == limit) {
4222 return KV_LIMIT;
4223 }
4224 nextKv = heap.peek();
4225 } while (nextKv != null && nextKv.matchingRow(currentRow, offset, length));
4226 } catch (FileNotFoundException e) {
4227 abortRegionServer(e.getMessage());
4228 throw new NotServingRegionException(region.getRegionNameAsString() + " is closing");
4229 }
4230 return nextKv;
4231 }
4232
4233
4234
4235
4236 @Override
4237 public synchronized boolean isFilterDone() throws IOException {
4238 return isFilterDoneInternal();
4239 }
4240
4241 private boolean isFilterDoneInternal() throws IOException {
4242 return this.filter != null && this.filter.filterAllRemaining();
4243 }
4244
4245 private boolean nextInternal(List<Cell> results, int limit)
4246 throws IOException {
4247 if (!results.isEmpty()) {
4248 throw new IllegalArgumentException("First parameter should be an empty list");
4249 }
4250 RpcCallContext rpcCall = RpcServer.getCurrentCall();
4251
4252
4253
4254
4255
4256 while (true) {
4257 if (rpcCall != null) {
4258
4259
4260
4261
4262 long afterTime = rpcCall.disconnectSince();
4263 if (afterTime >= 0) {
4264 throw new CallerDisconnectedException(
4265 "Aborting on region " + getRegionNameAsString() + ", call " +
4266 this + " after " + afterTime + " ms, since " +
4267 "caller disconnected");
4268 }
4269 }
4270
4271
4272 KeyValue current = this.storeHeap.peek();
4273
4274 byte[] currentRow = null;
4275 int offset = 0;
4276 short length = 0;
4277 if (current != null) {
4278 currentRow = current.getBuffer();
4279 offset = current.getRowOffset();
4280 length = current.getRowLength();
4281 }
4282 boolean stopRow = isStopRow(currentRow, offset, length);
4283
4284
4285 if (joinedContinuationRow == null) {
4286
4287 if (stopRow) {
4288 if (filter != null && filter.hasFilterRow()) {
4289 filter.filterRowCells(results);
4290 }
4291 return false;
4292 }
4293
4294
4295
4296 if (filterRowKey(currentRow, offset, length)) {
4297 boolean moreRows = nextRow(currentRow, offset, length);
4298 if (!moreRows) return false;
4299 results.clear();
4300 continue;
4301 }
4302
4303 KeyValue nextKv = populateResult(results, this.storeHeap, limit, currentRow, offset,
4304 length);
4305
4306 if (nextKv == KV_LIMIT) {
4307 if (this.filter != null && filter.hasFilterRow()) {
4308 throw new IncompatibleFilterException(
4309 "Filter whose hasFilterRow() returns true is incompatible with scan with limit!");
4310 }
4311 return true;
4312 }
4313
4314 stopRow = nextKv == null ||
4315 isStopRow(nextKv.getBuffer(), nextKv.getRowOffset(), nextKv.getRowLength());
4316
4317 final boolean isEmptyRow = results.isEmpty();
4318
4319
4320
4321 FilterWrapper.FilterRowRetCode ret = FilterWrapper.FilterRowRetCode.NOT_CALLED;
4322 if (filter != null && filter.hasFilterRow()) {
4323 ret = filter.filterRowCellsWithRet(results);
4324 }
4325
4326 if ((isEmptyRow || ret == FilterWrapper.FilterRowRetCode.EXCLUDE) || filterRow()) {
4327 results.clear();
4328 boolean moreRows = nextRow(currentRow, offset, length);
4329 if (!moreRows) return false;
4330
4331
4332
4333 if (!stopRow) continue;
4334 return false;
4335 }
4336
4337
4338
4339
4340
4341 if (this.joinedHeap != null) {
4342 KeyValue nextJoinedKv = joinedHeap.peek();
4343
4344 boolean mayHaveData =
4345 (nextJoinedKv != null && nextJoinedKv.matchingRow(currentRow, offset, length))
4346 || (this.joinedHeap.requestSeek(KeyValue.createFirstOnRow(currentRow, offset, length),
4347 true, true)
4348 && joinedHeap.peek() != null
4349 && joinedHeap.peek().matchingRow(currentRow, offset, length));
4350 if (mayHaveData) {
4351 joinedContinuationRow = current;
4352 populateFromJoinedHeap(results, limit);
4353 }
4354 }
4355 } else {
4356
4357 populateFromJoinedHeap(results, limit);
4358 }
4359
4360
4361
4362 if (joinedContinuationRow != null) {
4363 return true;
4364 }
4365
4366
4367
4368
4369 if (results.isEmpty()) {
4370 boolean moreRows = nextRow(currentRow, offset, length);
4371 if (!moreRows) return false;
4372 if (!stopRow) continue;
4373 }
4374
4375
4376 return !stopRow;
4377 }
4378 }
4379
4380
4381
4382
4383
4384
4385
4386
4387 private boolean filterRow() throws IOException {
4388
4389
4390 return filter != null && (!filter.hasFilterRow())
4391 && filter.filterRow();
4392 }
4393
4394 private boolean filterRowKey(byte[] row, int offset, short length) throws IOException {
4395 return filter != null
4396 && filter.filterRowKey(row, offset, length);
4397 }
4398
4399 protected boolean nextRow(byte [] currentRow, int offset, short length) throws IOException {
4400 assert this.joinedContinuationRow == null: "Trying to go to next row during joinedHeap read.";
4401 KeyValue next;
4402 while ((next = this.storeHeap.peek()) != null &&
4403 next.matchingRow(currentRow, offset, length)) {
4404 this.storeHeap.next(MOCKED_LIST);
4405 }
4406 resetFilters();
4407
4408 return this.region.getCoprocessorHost() == null
4409 || this.region.getCoprocessorHost()
4410 .postScannerFilterRow(this, currentRow, offset, length);
4411 }
4412
4413 protected boolean isStopRow(byte[] currentRow, int offset, short length) {
4414 return currentRow == null ||
4415 (stopRow != null &&
4416 comparator.compareRows(stopRow, 0, stopRow.length,
4417 currentRow, offset, length) <= isScan);
4418 }
4419
4420 @Override
4421 public synchronized void close() {
4422 if (storeHeap != null) {
4423 storeHeap.close();
4424 storeHeap = null;
4425 }
4426 if (joinedHeap != null) {
4427 joinedHeap.close();
4428 joinedHeap = null;
4429 }
4430
4431 scannerReadPoints.remove(this);
4432 this.filterClosed = true;
4433 }
4434
4435 KeyValueHeap getStoreHeapForTesting() {
4436 return storeHeap;
4437 }
4438
4439 @Override
4440 public synchronized boolean reseek(byte[] row) throws IOException {
4441 if (row == null) {
4442 throw new IllegalArgumentException("Row cannot be null.");
4443 }
4444 boolean result = false;
4445 startRegionOperation();
4446 try {
4447 KeyValue kv = KeyValue.createFirstOnRow(row);
4448
4449 result = this.storeHeap.requestSeek(kv, true, true);
4450 if (this.joinedHeap != null) {
4451 result = this.joinedHeap.requestSeek(kv, true, true) || result;
4452 }
4453 } catch (FileNotFoundException e) {
4454 abortRegionServer(e.getMessage());
4455 throw new NotServingRegionException(region.getRegionNameAsString() + " is closing");
4456 } finally {
4457 closeRegionOperation();
4458 }
4459 return result;
4460 }
4461 }
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483
4484 static HRegion newHRegion(Path tableDir, HLog log, FileSystem fs,
4485 Configuration conf, HRegionInfo regionInfo, final HTableDescriptor htd,
4486 RegionServerServices rsServices) {
4487 try {
4488 @SuppressWarnings("unchecked")
4489 Class<? extends HRegion> regionClass =
4490 (Class<? extends HRegion>) conf.getClass(HConstants.REGION_IMPL, HRegion.class);
4491
4492 Constructor<? extends HRegion> c =
4493 regionClass.getConstructor(Path.class, HLog.class, FileSystem.class,
4494 Configuration.class, HRegionInfo.class, HTableDescriptor.class,
4495 RegionServerServices.class);
4496
4497 return c.newInstance(tableDir, log, fs, conf, regionInfo, htd, rsServices);
4498 } catch (Throwable e) {
4499
4500 throw new IllegalStateException("Could not instantiate a region instance.", e);
4501 }
4502 }
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
4523 final Configuration conf, final HTableDescriptor hTableDescriptor)
4524 throws IOException {
4525 return createHRegion(info, rootDir, conf, hTableDescriptor, null);
4526 }
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539 public static void closeHRegion(final HRegion r) throws IOException {
4540 if (r == null) return;
4541 r.close();
4542 if (r.getLog() == null) return;
4543 r.getLog().closeAndDelete();
4544 }
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554
4555
4556
4557
4558
4559
4560
4561 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
4562 final Configuration conf,
4563 final HTableDescriptor hTableDescriptor,
4564 final HLog hlog,
4565 final boolean initialize)
4566 throws IOException {
4567 return createHRegion(info, rootDir, conf, hTableDescriptor,
4568 hlog, initialize, false);
4569 }
4570
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
4588 final Configuration conf,
4589 final HTableDescriptor hTableDescriptor,
4590 final HLog hlog,
4591 final boolean initialize, final boolean ignoreHLog)
4592 throws IOException {
4593 Path tableDir = FSUtils.getTableDir(rootDir, info.getTable());
4594 return createHRegion(info, rootDir, tableDir, conf, hTableDescriptor, hlog, initialize, ignoreHLog);
4595 }
4596
4597
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir, final Path tableDir,
4615 final Configuration conf,
4616 final HTableDescriptor hTableDescriptor,
4617 final HLog hlog,
4618 final boolean initialize, final boolean ignoreHLog)
4619 throws IOException {
4620 LOG.info("creating HRegion " + info.getTable().getNameAsString()
4621 + " HTD == " + hTableDescriptor + " RootDir = " + rootDir +
4622 " Table name == " + info.getTable().getNameAsString());
4623 FileSystem fs = FileSystem.get(conf);
4624 HRegionFileSystem rfs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, tableDir, info);
4625 HLog effectiveHLog = hlog;
4626 if (hlog == null && !ignoreHLog) {
4627 effectiveHLog = HLogFactory.createHLog(fs, rfs.getRegionDir(),
4628 HConstants.HREGION_LOGDIR_NAME, conf);
4629 }
4630 HRegion region = HRegion.newHRegion(tableDir,
4631 effectiveHLog, fs, conf, info, hTableDescriptor, null);
4632 if (initialize) {
4633
4634
4635 region.setSequenceId(region.initialize());
4636 }
4637 return region;
4638 }
4639
4640 public static HRegion createHRegion(final HRegionInfo info, final Path rootDir,
4641 final Configuration conf,
4642 final HTableDescriptor hTableDescriptor,
4643 final HLog hlog)
4644 throws IOException {
4645 return createHRegion(info, rootDir, conf, hTableDescriptor, hlog, true);
4646 }
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661 public static HRegion openHRegion(final HRegionInfo info,
4662 final HTableDescriptor htd, final HLog wal,
4663 final Configuration conf)
4664 throws IOException {
4665 return openHRegion(info, htd, wal, conf, null, null);
4666 }
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683 public static HRegion openHRegion(final HRegionInfo info,
4684 final HTableDescriptor htd, final HLog wal, final Configuration conf,
4685 final RegionServerServices rsServices,
4686 final CancelableProgressable reporter)
4687 throws IOException {
4688 return openHRegion(FSUtils.getRootDir(conf), info, htd, wal, conf, rsServices, reporter);
4689 }
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704 public static HRegion openHRegion(Path rootDir, final HRegionInfo info,
4705 final HTableDescriptor htd, final HLog wal, final Configuration conf)
4706 throws IOException {
4707 return openHRegion(rootDir, info, htd, wal, conf, null, null);
4708 }
4709
4710
4711
4712
4713
4714
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725 public static HRegion openHRegion(final Path rootDir, final HRegionInfo info,
4726 final HTableDescriptor htd, final HLog wal, final Configuration conf,
4727 final RegionServerServices rsServices,
4728 final CancelableProgressable reporter)
4729 throws IOException {
4730 FileSystem fs = null;
4731 if (rsServices != null) {
4732 fs = rsServices.getFileSystem();
4733 }
4734 if (fs == null) {
4735 fs = FileSystem.get(conf);
4736 }
4737 return openHRegion(conf, fs, rootDir, info, htd, wal, rsServices, reporter);
4738 }
4739
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754 public static HRegion openHRegion(final Configuration conf, final FileSystem fs,
4755 final Path rootDir, final HRegionInfo info, final HTableDescriptor htd, final HLog wal)
4756 throws IOException {
4757 return openHRegion(conf, fs, rootDir, info, htd, wal, null, null);
4758 }
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776 public static HRegion openHRegion(final Configuration conf, final FileSystem fs,
4777 final Path rootDir, final HRegionInfo info, final HTableDescriptor htd, final HLog wal,
4778 final RegionServerServices rsServices, final CancelableProgressable reporter)
4779 throws IOException {
4780 Path tableDir = FSUtils.getTableDir(rootDir, info.getTable());
4781 return openHRegion(conf, fs, rootDir, tableDir, info, htd, wal, rsServices, reporter);
4782 }
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
4794
4795
4796
4797
4798
4799
4800 public static HRegion openHRegion(final Configuration conf, final FileSystem fs,
4801 final Path rootDir, final Path tableDir, final HRegionInfo info, final HTableDescriptor htd, final HLog wal,
4802 final RegionServerServices rsServices, final CancelableProgressable reporter)
4803 throws IOException {
4804 if (info == null) throw new NullPointerException("Passed region info is null");
4805 if (LOG.isDebugEnabled()) {
4806 LOG.debug("Opening region: " + info);
4807 }
4808 HRegion r = HRegion.newHRegion(tableDir, wal, fs, conf, info, htd, rsServices);
4809 return r.openHRegion(reporter);
4810 }
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820 public static HRegion openHRegion(final HRegion other, final CancelableProgressable reporter)
4821 throws IOException {
4822 HRegionFileSystem regionFs = other.getRegionFileSystem();
4823 HRegion r = newHRegion(regionFs.getTableDir(), other.getLog(), regionFs.getFileSystem(),
4824 other.baseConf, other.getRegionInfo(), other.getTableDesc(), null);
4825 return r.openHRegion(reporter);
4826 }
4827
4828
4829
4830
4831
4832
4833
4834
4835 protected HRegion openHRegion(final CancelableProgressable reporter)
4836 throws IOException {
4837
4838 checkCompressionCodecs();
4839
4840
4841 checkEncryption();
4842
4843 checkClassLoading();
4844 this.openSeqNum = initialize(reporter);
4845 this.setSequenceId(openSeqNum);
4846 return this;
4847 }
4848
4849 private void checkCompressionCodecs() throws IOException {
4850 for (HColumnDescriptor fam: this.htableDescriptor.getColumnFamilies()) {
4851 CompressionTest.testCompression(fam.getCompression());
4852 CompressionTest.testCompression(fam.getCompactionCompression());
4853 }
4854 }
4855
4856 private void checkEncryption() throws IOException {
4857 for (HColumnDescriptor fam: this.htableDescriptor.getColumnFamilies()) {
4858 EncryptionTest.testEncryption(conf, fam.getEncryptionType(), fam.getEncryptionKey());
4859 }
4860 }
4861
4862 private void checkClassLoading() throws IOException {
4863 RegionSplitPolicy.getSplitPolicyClass(this.htableDescriptor, conf);
4864 RegionCoprocessorHost.testTableCoprocessorAttrs(conf, this.htableDescriptor);
4865 }
4866
4867
4868
4869
4870
4871
4872 HRegion createDaughterRegionFromSplits(final HRegionInfo hri) throws IOException {
4873
4874 fs.commitDaughterRegion(hri);
4875
4876
4877 HRegion r = HRegion.newHRegion(this.fs.getTableDir(), this.getLog(), fs.getFileSystem(),
4878 this.getBaseConf(), hri, this.getTableDesc(), rsServices);
4879 r.readRequestsCount.set(this.getReadRequestsCount() / 2);
4880 r.writeRequestsCount.set(this.getWriteRequestsCount() / 2);
4881 return r;
4882 }
4883
4884
4885
4886
4887
4888
4889
4890
4891 HRegion createMergedRegionFromMerges(final HRegionInfo mergedRegionInfo,
4892 final HRegion region_b) throws IOException {
4893 HRegion r = HRegion.newHRegion(this.fs.getTableDir(), this.getLog(),
4894 fs.getFileSystem(), this.getBaseConf(), mergedRegionInfo,
4895 this.getTableDesc(), this.rsServices);
4896 r.readRequestsCount.set(this.getReadRequestsCount()
4897 + region_b.getReadRequestsCount());
4898 r.writeRequestsCount.set(this.getWriteRequestsCount()
4899
4900 + region_b.getWriteRequestsCount());
4901 this.fs.commitMergedRegion(mergedRegionInfo);
4902 return r;
4903 }
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916 public static void addRegionToMETA(final HRegion meta, final HRegion r) throws IOException {
4917 meta.checkResources();
4918
4919 byte[] row = r.getRegionName();
4920 final long now = EnvironmentEdgeManager.currentTimeMillis();
4921 final List<Cell> cells = new ArrayList<Cell>(2);
4922 cells.add(new KeyValue(row, HConstants.CATALOG_FAMILY,
4923 HConstants.REGIONINFO_QUALIFIER, now,
4924 r.getRegionInfo().toByteArray()));
4925
4926 cells.add(new KeyValue(row, HConstants.CATALOG_FAMILY,
4927 HConstants.META_VERSION_QUALIFIER, now,
4928 Bytes.toBytes(HConstants.META_VERSION)));
4929 meta.put(row, HConstants.CATALOG_FAMILY, cells);
4930 }
4931
4932
4933
4934
4935
4936
4937
4938
4939 @Deprecated
4940 public static Path getRegionDir(final Path tabledir, final String name) {
4941 return new Path(tabledir, name);
4942 }
4943
4944
4945
4946
4947
4948
4949
4950
4951 @Deprecated
4952 public static Path getRegionDir(final Path rootdir, final HRegionInfo info) {
4953 return new Path(
4954 FSUtils.getTableDir(rootdir, info.getTable()), info.getEncodedName());
4955 }
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965 public static boolean rowIsInRange(HRegionInfo info, final byte [] row) {
4966 return ((info.getStartKey().length == 0) ||
4967 (Bytes.compareTo(info.getStartKey(), row) <= 0)) &&
4968 ((info.getEndKey().length == 0) ||
4969 (Bytes.compareTo(info.getEndKey(), row) > 0));
4970 }
4971
4972 public static boolean rowIsInRange(HRegionInfo info, final byte [] row, final int offset,
4973 final short length) {
4974 return ((info.getStartKey().length == 0) ||
4975 (Bytes.compareTo(info.getStartKey(), 0, info.getStartKey().length,
4976 row, offset, length) <= 0)) &&
4977 ((info.getEndKey().length == 0) ||
4978 (Bytes.compareTo(info.getEndKey(), 0, info.getEndKey().length, row, offset, length) > 0));
4979 }
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989 public static HRegion mergeAdjacent(final HRegion srcA, final HRegion srcB)
4990 throws IOException {
4991 HRegion a = srcA;
4992 HRegion b = srcB;
4993
4994
4995
4996 if (srcA.getStartKey() == null) {
4997 if (srcB.getStartKey() == null) {
4998 throw new IOException("Cannot merge two regions with null start key");
4999 }
5000
5001 } else if ((srcB.getStartKey() == null) ||
5002 (Bytes.compareTo(srcA.getStartKey(), srcB.getStartKey()) > 0)) {
5003 a = srcB;
5004 b = srcA;
5005 }
5006
5007 if (!(Bytes.compareTo(a.getEndKey(), b.getStartKey()) == 0)) {
5008 throw new IOException("Cannot merge non-adjacent regions");
5009 }
5010 return merge(a, b);
5011 }
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021 public static HRegion merge(final HRegion a, final HRegion b) throws IOException {
5022 if (!a.getRegionInfo().getTable().equals(b.getRegionInfo().getTable())) {
5023 throw new IOException("Regions do not belong to the same table");
5024 }
5025
5026 FileSystem fs = a.getRegionFileSystem().getFileSystem();
5027
5028 a.flushcache();
5029 b.flushcache();
5030
5031
5032 a.compactStores(true);
5033 if (LOG.isDebugEnabled()) {
5034 LOG.debug("Files for region: " + a);
5035 a.getRegionFileSystem().logFileSystemState(LOG);
5036 }
5037 b.compactStores(true);
5038 if (LOG.isDebugEnabled()) {
5039 LOG.debug("Files for region: " + b);
5040 b.getRegionFileSystem().logFileSystemState(LOG);
5041 }
5042
5043 RegionMergeTransaction rmt = new RegionMergeTransaction(a, b, true);
5044 if (!rmt.prepare(null)) {
5045 throw new IOException("Unable to merge regions " + a + " and " + b);
5046 }
5047 HRegionInfo mergedRegionInfo = rmt.getMergedRegionInfo();
5048 LOG.info("starting merge of regions: " + a + " and " + b
5049 + " into new region " + mergedRegionInfo.getRegionNameAsString()
5050 + " with start key <"
5051 + Bytes.toStringBinary(mergedRegionInfo.getStartKey())
5052 + "> and end key <"
5053 + Bytes.toStringBinary(mergedRegionInfo.getEndKey()) + ">");
5054 HRegion dstRegion;
5055 try {
5056 dstRegion = rmt.execute(null, null);
5057 } catch (IOException ioe) {
5058 rmt.rollback(null, null);
5059 throw new IOException("Failed merging region " + a + " and " + b
5060 + ", and succssfully rolled back");
5061 }
5062 dstRegion.compactStores(true);
5063
5064 if (LOG.isDebugEnabled()) {
5065 LOG.debug("Files for new region");
5066 dstRegion.getRegionFileSystem().logFileSystemState(LOG);
5067 }
5068
5069 if (dstRegion.getRegionFileSystem().hasReferences(dstRegion.getTableDesc())) {
5070 throw new IOException("Merged region " + dstRegion
5071 + " still has references after the compaction, is compaction canceled?");
5072 }
5073
5074
5075 HFileArchiver.archiveRegion(a.getBaseConf(), fs, a.getRegionInfo());
5076
5077 HFileArchiver.archiveRegion(b.getBaseConf(), fs, b.getRegionInfo());
5078
5079 LOG.info("merge completed. New region is " + dstRegion);
5080 return dstRegion;
5081 }
5082
5083
5084
5085
5086
5087 boolean isMajorCompaction() throws IOException {
5088 for (Store store : this.stores.values()) {
5089 if (store.isMajorCompaction()) {
5090 return true;
5091 }
5092 }
5093 return false;
5094 }
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104 public Result get(final Get get) throws IOException {
5105 checkRow(get.getRow(), "Get");
5106
5107 if (get.hasFamilies()) {
5108 for (byte [] family: get.familySet()) {
5109 checkFamily(family);
5110 }
5111 } else {
5112 for (byte[] family: this.htableDescriptor.getFamiliesKeys()) {
5113 get.addFamily(family);
5114 }
5115 }
5116 List<Cell> results = get(get, true);
5117 return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null);
5118 }
5119
5120
5121
5122
5123
5124
5125 public List<Cell> get(Get get, boolean withCoprocessor)
5126 throws IOException {
5127
5128 List<Cell> results = new ArrayList<Cell>();
5129
5130
5131 if (withCoprocessor && (coprocessorHost != null)) {
5132 if (coprocessorHost.preGet(get, results)) {
5133 return results;
5134 }
5135 }
5136
5137 Scan scan = new Scan(get);
5138
5139 RegionScanner scanner = null;
5140 try {
5141 scanner = getScanner(scan);
5142 scanner.next(results);
5143 } finally {
5144 if (scanner != null)
5145 scanner.close();
5146 }
5147
5148
5149 if (withCoprocessor && (coprocessorHost != null)) {
5150 coprocessorHost.postGet(get, results);
5151 }
5152
5153
5154 if (this.metricsRegion != null) {
5155 long totalSize = 0l;
5156 if (results != null) {
5157 for (Cell kv:results) {
5158 totalSize += KeyValueUtil.ensureKeyValue(kv).getLength();
5159 }
5160 }
5161 this.metricsRegion.updateGet(totalSize);
5162 }
5163
5164 return results;
5165 }
5166
5167 public void mutateRow(RowMutations rm) throws IOException {
5168
5169 mutateRowsWithLocks(rm.getMutations(), Collections.singleton(rm.getRow()));
5170 }
5171
5172
5173
5174
5175
5176 public void mutateRowsWithLocks(Collection<Mutation> mutations,
5177 Collection<byte[]> rowsToLock) throws IOException {
5178 mutateRowsWithLocks(mutations, rowsToLock, HConstants.NO_NONCE, HConstants.NO_NONCE);
5179 }
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193 public void mutateRowsWithLocks(Collection<Mutation> mutations,
5194 Collection<byte[]> rowsToLock, long nonceGroup, long nonce) throws IOException {
5195 MultiRowMutationProcessor proc = new MultiRowMutationProcessor(mutations, rowsToLock);
5196 processRowsWithLocks(proc, -1, nonceGroup, nonce);
5197 }
5198
5199
5200
5201
5202 public ClientProtos.RegionLoadStats getLoadStatistics() {
5203 if (!regionStatsEnabled) {
5204 return null;
5205 }
5206 ClientProtos.RegionLoadStats.Builder stats = ClientProtos.RegionLoadStats.newBuilder();
5207 stats.setMemstoreLoad((int) (Math.min(100, (this.memstoreSize.get() * 100) / this
5208 .memstoreFlushSize)));
5209 stats.setHeapOccupancy((int)rsServices.getHeapMemoryManager().getHeapOccupancyPercent()*100);
5210 stats.setCompactionPressure((int)rsServices.getCompactionPressure()*100 > 100 ? 100 :
5211 (int)rsServices.getCompactionPressure()*100);
5212 return stats.build();
5213 }
5214
5215
5216
5217
5218
5219
5220
5221
5222 public void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce)
5223 throws IOException {
5224 processRowsWithLocks(processor, rowProcessorTimeout, nonceGroup, nonce);
5225 }
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236 public void processRowsWithLocks(RowProcessor<?,?> processor, long timeout,
5237 long nonceGroup, long nonce) throws IOException {
5238
5239 for (byte[] row : processor.getRowsToLock()) {
5240 checkRow(row, "processRowsWithLocks");
5241 }
5242 if (!processor.readOnly()) {
5243 checkReadOnly();
5244 }
5245 checkResources();
5246
5247 startRegionOperation();
5248 WALEdit walEdit = new WALEdit();
5249
5250
5251 try {
5252 processor.preProcess(this, walEdit);
5253 } catch (IOException e) {
5254 closeRegionOperation();
5255 throw e;
5256 }
5257
5258 if (processor.readOnly()) {
5259 try {
5260 long now = EnvironmentEdgeManager.currentTimeMillis();
5261 doProcessRowWithTimeout(
5262 processor, now, this, null, null, timeout);
5263 processor.postProcess(this, walEdit, true);
5264 } catch (IOException e) {
5265 throw e;
5266 } finally {
5267 closeRegionOperation();
5268 }
5269 return;
5270 }
5271
5272 MultiVersionConsistencyControl.WriteEntry writeEntry = null;
5273 boolean locked = false;
5274 boolean walSyncSuccessful = false;
5275 List<RowLock> acquiredRowLocks = null;
5276 long addedSize = 0;
5277 List<Mutation> mutations = new ArrayList<Mutation>();
5278 Collection<byte[]> rowsToLock = processor.getRowsToLock();
5279 try {
5280
5281 acquiredRowLocks = new ArrayList<RowLock>(rowsToLock.size());
5282 for (byte[] row : rowsToLock) {
5283
5284 acquiredRowLocks.add(getRowLock(row));
5285 }
5286
5287 lock(this.updatesLock.readLock(), acquiredRowLocks.size() == 0 ? 1 : acquiredRowLocks.size());
5288 locked = true;
5289
5290 long now = EnvironmentEdgeManager.currentTimeMillis();
5291 try {
5292
5293
5294 doProcessRowWithTimeout(
5295 processor, now, this, mutations, walEdit, timeout);
5296
5297 if (!mutations.isEmpty()) {
5298
5299 writeEntry = mvcc.beginMemstoreInsert();
5300
5301 processor.preBatchMutate(this, walEdit);
5302
5303 for (Mutation m : mutations) {
5304
5305 rewriteCellTags(m.getFamilyCellMap(), m);
5306
5307 for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
5308 KeyValue kv = KeyValueUtil.ensureKeyValue(cellScanner.current());
5309 kv.setMvccVersion(writeEntry.getWriteNumber());
5310 byte[] family = kv.getFamily();
5311 checkFamily(family);
5312 addedSize += stores.get(family).add(kv);
5313 }
5314 }
5315
5316 long txid = 0;
5317
5318 if (!walEdit.isEmpty()) {
5319 txid = this.log.appendNoSync(this.getRegionInfo(),
5320 this.htableDescriptor.getTableName(), walEdit, processor.getClusterIds(), now,
5321 this.htableDescriptor, this.sequenceId, true, nonceGroup, nonce);
5322 }
5323
5324 if (locked) {
5325 this.updatesLock.readLock().unlock();
5326 locked = false;
5327 }
5328
5329
5330 releaseRowLocks(acquiredRowLocks);
5331
5332
5333 if (txid != 0) {
5334 syncOrDefer(txid, getEffectiveDurability(processor.useDurability()));
5335 }
5336 walSyncSuccessful = true;
5337
5338 processor.postBatchMutate(this);
5339 }
5340 } finally {
5341 if (!mutations.isEmpty() && !walSyncSuccessful) {
5342 LOG.warn("Wal sync failed. Roll back " + mutations.size() +
5343 " memstore keyvalues for row(s):" +
5344 processor.getRowsToLock().iterator().next() + "...");
5345 for (Mutation m : mutations) {
5346 for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
5347 KeyValue kv = KeyValueUtil.ensureKeyValue(cellScanner.current());
5348 stores.get(kv.getFamily()).rollback(kv);
5349 }
5350 }
5351 }
5352
5353 if (writeEntry != null) {
5354 mvcc.completeMemstoreInsert(writeEntry);
5355 writeEntry = null;
5356 }
5357 if (locked) {
5358 this.updatesLock.readLock().unlock();
5359 locked = false;
5360 }
5361
5362 releaseRowLocks(acquiredRowLocks);
5363 }
5364
5365
5366 processor.postProcess(this, walEdit, walSyncSuccessful);
5367
5368 } catch (IOException e) {
5369 throw e;
5370 } finally {
5371 closeRegionOperation();
5372 if (!mutations.isEmpty() &&
5373 isFlushSize(this.addAndGetGlobalMemstoreSize(addedSize))) {
5374 requestFlush();
5375 }
5376 }
5377 }
5378
5379 private void doProcessRowWithTimeout(final RowProcessor<?,?> processor,
5380 final long now,
5381 final HRegion region,
5382 final List<Mutation> mutations,
5383 final WALEdit walEdit,
5384 final long timeout) throws IOException {
5385
5386 if (timeout < 0) {
5387 try {
5388 processor.process(now, region, mutations, walEdit);
5389 } catch (IOException e) {
5390 LOG.warn("RowProcessor:" + processor.getClass().getName() +
5391 " throws Exception on row(s):" +
5392 Bytes.toStringBinary(
5393 processor.getRowsToLock().iterator().next()) + "...", e);
5394 throw e;
5395 }
5396 return;
5397 }
5398
5399
5400 FutureTask<Void> task =
5401 new FutureTask<Void>(new Callable<Void>() {
5402 @Override
5403 public Void call() throws IOException {
5404 try {
5405 processor.process(now, region, mutations, walEdit);
5406 return null;
5407 } catch (IOException e) {
5408 LOG.warn("RowProcessor:" + processor.getClass().getName() +
5409 " throws Exception on row(s):" +
5410 Bytes.toStringBinary(
5411 processor.getRowsToLock().iterator().next()) + "...", e);
5412 throw e;
5413 }
5414 }
5415 });
5416 rowProcessorExecutor.execute(task);
5417 try {
5418 task.get(timeout, TimeUnit.MILLISECONDS);
5419 } catch (TimeoutException te) {
5420 LOG.error("RowProcessor timeout:" + timeout + " ms on row(s):" +
5421 Bytes.toStringBinary(processor.getRowsToLock().iterator().next()) +
5422 "...");
5423 throw new IOException(te);
5424 } catch (Exception e) {
5425 throw new IOException(e);
5426 }
5427 }
5428
5429 public Result append(Append append) throws IOException {
5430 return append(append, HConstants.NO_NONCE, HConstants.NO_NONCE);
5431 }
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443 public Result append(Append append, long nonceGroup, long nonce)
5444 throws IOException {
5445 byte[] row = append.getRow();
5446 checkRow(row, "append");
5447 boolean flush = false;
5448 Durability durability = getEffectiveDurability(append.getDurability());
5449 boolean writeToWAL = durability != Durability.SKIP_WAL;
5450 WALEdit walEdits = null;
5451 List<Cell> allKVs = new ArrayList<Cell>(append.size());
5452 Map<Store, List<Cell>> tempMemstore = new HashMap<Store, List<Cell>>();
5453
5454 long size = 0;
5455 long txid = 0;
5456
5457 checkReadOnly();
5458 checkResources();
5459
5460 startRegionOperation(Operation.APPEND);
5461 this.writeRequestsCount.increment();
5462 WriteEntry w = null;
5463 RowLock rowLock;
5464 try {
5465 rowLock = getRowLock(row);
5466 try {
5467 lock(this.updatesLock.readLock());
5468 try {
5469
5470
5471 mvcc.completeMemstoreInsert(mvcc.beginMemstoreInsert());
5472 if (this.coprocessorHost != null) {
5473 Result r = this.coprocessorHost.preAppendAfterRowLock(append);
5474 if(r!= null) {
5475 return r;
5476 }
5477 }
5478
5479 w = mvcc.beginMemstoreInsert();
5480 long now = EnvironmentEdgeManager.currentTimeMillis();
5481
5482 for (Map.Entry<byte[], List<Cell>> family : append.getFamilyCellMap().entrySet()) {
5483
5484 Store store = stores.get(family.getKey());
5485 List<Cell> kvs = new ArrayList<Cell>(family.getValue().size());
5486
5487
5488
5489
5490
5491 Collections.sort(family.getValue(), store.getComparator());
5492
5493 Get get = new Get(row);
5494 for (Cell cell : family.getValue()) {
5495 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
5496 get.addColumn(family.getKey(), kv.getQualifier());
5497 }
5498 List<Cell> results = get(get, false);
5499
5500
5501
5502
5503
5504
5505
5506 int idx = 0;
5507 for (Cell cell : family.getValue()) {
5508 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
5509 KeyValue newKv;
5510 KeyValue oldKv = null;
5511 if (idx < results.size()
5512 && CellUtil.matchingQualifier(results.get(idx), kv)) {
5513 oldKv = KeyValueUtil.ensureKeyValue(results.get(idx));
5514 long ts = Math.max(now, oldKv.getTimestamp());
5515
5516
5517 List<Tag> newTags = new ArrayList<Tag>();
5518
5519
5520
5521 if (oldKv.getTagsLengthUnsigned() > 0) {
5522 Iterator<Tag> i = CellUtil.tagsIterator(oldKv.getTagsArray(),
5523 oldKv.getTagsOffset(), oldKv.getTagsLengthUnsigned());
5524 while (i.hasNext()) {
5525 newTags.add(i.next());
5526 }
5527 }
5528 if (kv.getTagsLengthUnsigned() > 0) {
5529 Iterator<Tag> i = CellUtil.tagsIterator(kv.getTagsArray(), kv.getTagsOffset(),
5530 kv.getTagsLengthUnsigned());
5531 while (i.hasNext()) {
5532 newTags.add(i.next());
5533 }
5534 }
5535
5536
5537
5538 if (append.getTTL() != Long.MAX_VALUE) {
5539
5540 newTags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(append.getTTL())));
5541 }
5542
5543
5544 byte[] tagBytes = Tag.fromList(newTags);
5545
5546
5547 newKv = new KeyValue(row.length, kv.getFamilyLength(),
5548 kv.getQualifierLength(), ts, KeyValue.Type.Put,
5549 oldKv.getValueLength() + kv.getValueLength(),
5550 tagBytes.length);
5551
5552 System.arraycopy(kv.getRowArray(), kv.getRowOffset(),
5553 newKv.getRowArray(), newKv.getRowOffset(), kv.getRowLength());
5554 System.arraycopy(kv.getFamilyArray(), kv.getFamilyOffset(),
5555 newKv.getFamilyArray(), newKv.getFamilyOffset(),
5556 kv.getFamilyLength());
5557 System.arraycopy(kv.getQualifierArray(), kv.getQualifierOffset(),
5558 newKv.getQualifierArray(), newKv.getQualifierOffset(),
5559 kv.getQualifierLength());
5560
5561 System.arraycopy(oldKv.getValueArray(), oldKv.getValueOffset(),
5562 newKv.getValueArray(), newKv.getValueOffset(),
5563 oldKv.getValueLength());
5564 System.arraycopy(kv.getValueArray(), kv.getValueOffset(),
5565 newKv.getValueArray(),
5566 newKv.getValueOffset() + oldKv.getValueLength(),
5567 kv.getValueLength());
5568
5569 System.arraycopy(tagBytes, 0, newKv.getTagsArray(), newKv.getTagsOffset(),
5570 tagBytes.length);
5571 idx++;
5572 } else {
5573
5574
5575 kv.updateLatestStamp(Bytes.toBytes(now));
5576
5577
5578
5579 if (append.getTTL() != Long.MAX_VALUE) {
5580 List<Tag> newTags = new ArrayList<Tag>(1);
5581 newTags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(append.getTTL())));
5582
5583 newKv = new KeyValue(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(),
5584 kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(),
5585 kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength(),
5586 kv.getTimestamp(), KeyValue.Type.codeToType(kv.getTypeByte()),
5587 kv.getValueArray(), kv.getValueOffset(), kv.getValueLength(),
5588 newTags);
5589 } else {
5590 newKv = kv;
5591 }
5592 }
5593 newKv.setMvccVersion(w.getWriteNumber());
5594
5595
5596 if (coprocessorHost != null) {
5597 newKv = KeyValueUtil.ensureKeyValue(coprocessorHost.postMutationBeforeWAL(
5598 RegionObserver.MutationType.APPEND, append, oldKv, (Cell) newKv));
5599 }
5600 kvs.add(newKv);
5601
5602
5603 if (writeToWAL) {
5604 if (walEdits == null) {
5605 walEdits = new WALEdit();
5606 }
5607 walEdits.add(newKv);
5608 }
5609 }
5610
5611
5612 tempMemstore.put(store, kvs);
5613 }
5614
5615
5616 if (writeToWAL) {
5617
5618
5619
5620 txid = this.log.appendNoSync(this.getRegionInfo(),
5621 this.htableDescriptor.getTableName(), walEdits, new ArrayList<UUID>(),
5622 EnvironmentEdgeManager.currentTimeMillis(), this.htableDescriptor, this.sequenceId,
5623 true, nonceGroup, nonce);
5624 } else {
5625 recordMutationWithoutWal(append.getFamilyCellMap());
5626 }
5627
5628
5629 for (Map.Entry<Store, List<Cell>> entry : tempMemstore.entrySet()) {
5630 Store store = entry.getKey();
5631 if (store.getFamily().getMaxVersions() == 1) {
5632
5633 size += store.upsert(entry.getValue(), getSmallestReadPoint());
5634 } else {
5635
5636 for (Cell cell: entry.getValue()) {
5637 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
5638 size += store.add(kv);
5639 }
5640 }
5641 allKVs.addAll(entry.getValue());
5642 }
5643 size = this.addAndGetGlobalMemstoreSize(size);
5644 flush = isFlushSize(size);
5645 } finally {
5646 this.updatesLock.readLock().unlock();
5647 }
5648 } finally {
5649 rowLock.release();
5650 }
5651 if (writeToWAL) {
5652
5653 syncOrDefer(txid, durability);
5654 }
5655 } finally {
5656 if (w != null) {
5657 mvcc.completeMemstoreInsert(w);
5658 }
5659 closeRegionOperation(Operation.APPEND);
5660 }
5661
5662 if (this.metricsRegion != null) {
5663 this.metricsRegion.updateAppend();
5664 }
5665
5666 if (flush) {
5667
5668 requestFlush();
5669 }
5670
5671
5672 return append.isReturnResults() ? Result.create(allKVs) : null;
5673 }
5674
5675 public Result increment(Increment increment) throws IOException {
5676 return increment(increment, HConstants.NO_NONCE, HConstants.NO_NONCE);
5677 }
5678
5679
5680
5681
5682
5683
5684
5685
5686
5687
5688 public Result increment(Increment increment, long nonceGroup, long nonce)
5689 throws IOException {
5690 byte [] row = increment.getRow();
5691 checkRow(row, "increment");
5692 TimeRange tr = increment.getTimeRange();
5693 boolean flush = false;
5694 Durability durability = getEffectiveDurability(increment.getDurability());
5695 boolean writeToWAL = durability != Durability.SKIP_WAL;
5696 WALEdit walEdits = null;
5697 List<Cell> allKVs = new ArrayList<Cell>(increment.size());
5698 Map<Store, List<Cell>> tempMemstore = new HashMap<Store, List<Cell>>();
5699
5700 long size = 0;
5701 long txid = 0;
5702
5703 checkReadOnly();
5704 checkResources();
5705
5706 startRegionOperation(Operation.INCREMENT);
5707 this.writeRequestsCount.increment();
5708 WriteEntry w = null;
5709 try {
5710 RowLock rowLock = getRowLock(row);
5711 try {
5712 lock(this.updatesLock.readLock());
5713 try {
5714
5715
5716 mvcc.completeMemstoreInsert(mvcc.beginMemstoreInsert());
5717 if (this.coprocessorHost != null) {
5718 Result r = this.coprocessorHost.preIncrementAfterRowLock(increment);
5719 if (r != null) {
5720 return r;
5721 }
5722 }
5723
5724 w = mvcc.beginMemstoreInsert();
5725 long now = EnvironmentEdgeManager.currentTimeMillis();
5726
5727 for (Map.Entry<byte [], List<Cell>> family:
5728 increment.getFamilyCellMap().entrySet()) {
5729
5730 Store store = stores.get(family.getKey());
5731 List<Cell> kvs = new ArrayList<Cell>(family.getValue().size());
5732
5733
5734
5735
5736
5737 Collections.sort(family.getValue(), store.getComparator());
5738
5739 Get get = new Get(row);
5740 for (Cell cell: family.getValue()) {
5741 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
5742 get.addColumn(family.getKey(), kv.getQualifier());
5743 }
5744 get.setTimeRange(tr.getMin(), tr.getMax());
5745 List<Cell> results = get(get, false);
5746
5747
5748
5749 int idx = 0;
5750 List<Cell> edits = family.getValue();
5751 for (int i = 0; i < edits.size(); i++) {
5752 Cell cell = edits.get(i);
5753 long amount = Bytes.toLong(CellUtil.cloneValue(cell));
5754 boolean noWriteBack = (amount == 0);
5755 List<Tag> newTags = new ArrayList<Tag>();
5756
5757
5758 if (cell.getTagsLengthUnsigned() > 0) {
5759 Iterator<Tag> itr = CellUtil.tagsIterator(cell.getTagsArray(),
5760 cell.getTagsOffset(), cell.getTagsLengthUnsigned());
5761 while (itr.hasNext()) {
5762 newTags.add(itr.next());
5763 }
5764 }
5765
5766 Cell c = null;
5767 long ts = now;
5768 if (idx < results.size() && CellUtil.matchingQualifier(results.get(idx), cell)) {
5769 c = results.get(idx);
5770 ts = Math.max(now, c.getTimestamp());
5771 if(c.getValueLength() == Bytes.SIZEOF_LONG) {
5772 amount += Bytes.toLong(c.getValueArray(), c.getValueOffset(), Bytes.SIZEOF_LONG);
5773 } else {
5774
5775 throw new org.apache.hadoop.hbase.DoNotRetryIOException(
5776 "Attempted to increment field that isn't 64 bits wide");
5777 }
5778
5779 if (c.getTagsLength() > 0) {
5780 Iterator<Tag> itr = CellUtil.tagsIterator(c.getTagsArray(),
5781 c.getTagsOffset(), c.getTagsLength());
5782 while (itr.hasNext()) {
5783 newTags.add(itr.next());
5784 }
5785 }
5786 if (i < ( edits.size() - 1) && !CellUtil.matchingQualifier(cell, edits.get(i + 1)))
5787 idx++;
5788 }
5789
5790
5791 byte[] q = CellUtil.cloneQualifier(cell);
5792 byte[] val = Bytes.toBytes(amount);
5793
5794
5795 if (increment.getTTL() != Long.MAX_VALUE) {
5796 newTags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(increment.getTTL())));
5797 }
5798
5799 KeyValue newKv = new KeyValue(row, 0, row.length,
5800 family.getKey(), 0, family.getKey().length,
5801 q, 0, q.length,
5802 ts,
5803 KeyValue.Type.Put,
5804 val, 0, val.length,
5805 newTags);
5806
5807 newKv.setMvccVersion(w.getWriteNumber());
5808
5809
5810 if (coprocessorHost != null) {
5811 newKv = KeyValueUtil.ensureKeyValue(coprocessorHost.postMutationBeforeWAL(
5812 RegionObserver.MutationType.INCREMENT, increment, c, (Cell) newKv));
5813 }
5814 allKVs.add(newKv);
5815
5816 if (!noWriteBack) {
5817 kvs.add(newKv);
5818
5819
5820 if (writeToWAL) {
5821 if (walEdits == null) {
5822 walEdits = new WALEdit();
5823 }
5824 walEdits.add(newKv);
5825 }
5826 }
5827 }
5828
5829
5830 if (!kvs.isEmpty()) {
5831 tempMemstore.put(store, kvs);
5832 }
5833 }
5834
5835
5836 if (walEdits != null && !walEdits.isEmpty()) {
5837 if (writeToWAL) {
5838
5839
5840
5841 txid = this.log.appendNoSync(this.getRegionInfo(),
5842 this.htableDescriptor.getTableName(), walEdits, new ArrayList<UUID>(),
5843 EnvironmentEdgeManager.currentTimeMillis(), this.htableDescriptor, this.sequenceId,
5844 true, nonceGroup, nonce);
5845 } else {
5846 recordMutationWithoutWal(increment.getFamilyCellMap());
5847 }
5848 }
5849
5850 if (!tempMemstore.isEmpty()) {
5851 for (Map.Entry<Store, List<Cell>> entry : tempMemstore.entrySet()) {
5852 Store store = entry.getKey();
5853 if (store.getFamily().getMaxVersions() == 1) {
5854
5855 size += store.upsert(entry.getValue(), getSmallestReadPoint());
5856 } else {
5857
5858 for (Cell cell : entry.getValue()) {
5859 KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
5860 size += store.add(kv);
5861 }
5862 }
5863 }
5864 size = this.addAndGetGlobalMemstoreSize(size);
5865 flush = isFlushSize(size);
5866 }
5867 } finally {
5868 this.updatesLock.readLock().unlock();
5869 }
5870 } finally {
5871 rowLock.release();
5872 }
5873 if (writeToWAL && (walEdits != null) && !walEdits.isEmpty()) {
5874
5875 syncOrDefer(txid, durability);
5876 }
5877 } finally {
5878 if (w != null) {
5879 mvcc.completeMemstoreInsert(w);
5880 }
5881 closeRegionOperation(Operation.INCREMENT);
5882 if (this.metricsRegion != null) {
5883 this.metricsRegion.updateIncrement();
5884 }
5885 }
5886
5887 if (flush) {
5888
5889 requestFlush();
5890 }
5891
5892 return Result.create(allKVs);
5893 }
5894
5895
5896
5897
5898
5899 private void checkFamily(final byte [] family)
5900 throws NoSuchColumnFamilyException {
5901 if (!this.htableDescriptor.hasFamily(family)) {
5902 throw new NoSuchColumnFamilyException("Column family " +
5903 Bytes.toString(family) + " does not exist in region " + this
5904 + " in table " + this.htableDescriptor);
5905 }
5906 }
5907
5908 public static final long FIXED_OVERHEAD = ClassSize.align(
5909 ClassSize.OBJECT +
5910 ClassSize.ARRAY +
5911 42 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT +
5912 (12 * Bytes.SIZEOF_LONG) +
5913 5 * Bytes.SIZEOF_BOOLEAN);
5914
5915
5916
5917
5918
5919
5920
5921
5922
5923
5924
5925 public static final long DEEP_OVERHEAD = FIXED_OVERHEAD +
5926 ClassSize.OBJECT +
5927 (2 * ClassSize.ATOMIC_BOOLEAN) +
5928 (3 * ClassSize.ATOMIC_LONG) +
5929 (2 * ClassSize.CONCURRENT_HASHMAP) +
5930 WriteState.HEAP_SIZE +
5931 ClassSize.CONCURRENT_SKIPLISTMAP + ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY +
5932 (2 * ClassSize.REENTRANT_LOCK) +
5933 ClassSize.ARRAYLIST +
5934 MultiVersionConsistencyControl.FIXED_SIZE
5935 + ClassSize.TREEMAP
5936 + 2 * ClassSize.ATOMIC_INTEGER
5937 ;
5938
5939 @Override
5940 public long heapSize() {
5941 long heapSize = DEEP_OVERHEAD;
5942 for (Store store : this.stores.values()) {
5943 heapSize += store.heapSize();
5944 }
5945
5946 return heapSize;
5947 }
5948
5949
5950
5951
5952
5953 private static void printUsageAndExit(final String message) {
5954 if (message != null && message.length() > 0) System.out.println(message);
5955 System.out.println("Usage: HRegion CATLALOG_TABLE_DIR [major_compact]");
5956 System.out.println("Options:");
5957 System.out.println(" major_compact Pass this option to major compact " +
5958 "passed region.");
5959 System.out.println("Default outputs scan of passed region.");
5960 System.exit(1);
5961 }
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979 public boolean registerService(Service instance) {
5980
5981
5982
5983 Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
5984 if (coprocessorServiceHandlers.containsKey(serviceDesc.getFullName())) {
5985 LOG.error("Coprocessor service " + serviceDesc.getFullName() +
5986 " already registered, rejecting request from " + instance
5987 );
5988 return false;
5989 }
5990
5991 coprocessorServiceHandlers.put(serviceDesc.getFullName(), instance);
5992 if (LOG.isDebugEnabled()) {
5993 LOG.debug("Registered coprocessor service: region="+
5994 Bytes.toStringBinary(getRegionName())+" service="+serviceDesc.getFullName());
5995 }
5996 return true;
5997 }
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013 public Message execService(RpcController controller, CoprocessorServiceCall call)
6014 throws IOException {
6015 String serviceName = call.getServiceName();
6016 String methodName = call.getMethodName();
6017 if (!coprocessorServiceHandlers.containsKey(serviceName)) {
6018 throw new UnknownProtocolException(null,
6019 "No registered coprocessor service found for name "+serviceName+
6020 " in region "+Bytes.toStringBinary(getRegionName()));
6021 }
6022
6023 Service service = coprocessorServiceHandlers.get(serviceName);
6024 Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
6025 Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
6026 if (methodDesc == null) {
6027 throw new UnknownProtocolException(service.getClass(),
6028 "Unknown method "+methodName+" called on service "+serviceName+
6029 " in region "+Bytes.toStringBinary(getRegionName()));
6030 }
6031
6032 Message.Builder builder = service.getRequestPrototype(methodDesc).newBuilderForType();
6033 ProtobufUtil.mergeFrom(builder, call.getRequest());
6034 Message request = builder.build();
6035
6036 if (coprocessorHost != null) {
6037 request = coprocessorHost.preEndpointInvocation(service, methodName, request);
6038 }
6039
6040 final Message.Builder responseBuilder =
6041 service.getResponsePrototype(methodDesc).newBuilderForType();
6042 service.callMethod(methodDesc, controller, request, new RpcCallback<Message>() {
6043 @Override
6044 public void run(Message message) {
6045 if (message != null) {
6046 responseBuilder.mergeFrom(message);
6047 }
6048 }
6049 });
6050
6051 if (coprocessorHost != null) {
6052 coprocessorHost.postEndpointInvocation(service, methodName, request, responseBuilder);
6053 }
6054
6055 IOException exception = ResponseConverter.getControllerException(controller);
6056 if (exception != null) {
6057 throw exception;
6058 }
6059
6060 return responseBuilder.build();
6061 }
6062
6063
6064
6065
6066
6067
6068
6069
6070
6071
6072
6073 private static void processTable(final FileSystem fs, final Path p,
6074 final HLog log, final Configuration c,
6075 final boolean majorCompact)
6076 throws IOException {
6077 HRegion region = null;
6078 FSTableDescriptors fst = new FSTableDescriptors(c);
6079
6080 if (FSUtils.getTableName(p).equals(TableName.META_TABLE_NAME)) {
6081 region = HRegion.newHRegion(p, log, fs, c,
6082 HRegionInfo.FIRST_META_REGIONINFO, fst.get(TableName.META_TABLE_NAME), null);
6083 } else {
6084 throw new IOException("Not a known catalog table: " + p.toString());
6085 }
6086 try {
6087 region.initialize();
6088 if (majorCompact) {
6089 region.compactStores(true);
6090 } else {
6091
6092 Scan scan = new Scan();
6093
6094 RegionScanner scanner = region.getScanner(scan);
6095 try {
6096 List<Cell> kvs = new ArrayList<Cell>();
6097 boolean done;
6098 do {
6099 kvs.clear();
6100 done = scanner.next(kvs);
6101 if (kvs.size() > 0) LOG.info(kvs);
6102 } while (done);
6103 } finally {
6104 scanner.close();
6105 }
6106 }
6107 } finally {
6108 region.close();
6109 }
6110 }
6111
6112 boolean shouldForceSplit() {
6113 return this.splitRequest;
6114 }
6115
6116 byte[] getExplicitSplitPoint() {
6117 return this.explicitSplitPoint;
6118 }
6119
6120 void forceSplit(byte[] sp) {
6121
6122
6123 this.splitRequest = true;
6124 if (sp != null) {
6125 this.explicitSplitPoint = sp;
6126 }
6127 }
6128
6129 void clearSplit() {
6130 this.splitRequest = false;
6131 this.explicitSplitPoint = null;
6132 }
6133
6134
6135
6136
6137 protected void prepareToSplit() {
6138
6139 }
6140
6141
6142
6143
6144
6145
6146
6147 public byte[] checkSplit() {
6148
6149 if (this.getRegionInfo().isMetaTable() ||
6150 TableName.NAMESPACE_TABLE_NAME.equals(this.getRegionInfo().getTable())) {
6151 if (shouldForceSplit()) {
6152 LOG.warn("Cannot split meta region in HBase 0.20 and above");
6153 }
6154 return null;
6155 }
6156
6157
6158 if (this.isRecovering()) {
6159 LOG.info("Cannot split region " + this.getRegionInfo().getEncodedName() + " in recovery.");
6160 return null;
6161 }
6162
6163 if (!splitPolicy.shouldSplit()) {
6164 return null;
6165 }
6166
6167 byte[] ret = splitPolicy.getSplitPoint();
6168
6169 if (ret != null) {
6170 try {
6171 checkRow(ret, "calculated split");
6172 } catch (IOException e) {
6173 LOG.error("Ignoring invalid split", e);
6174 return null;
6175 }
6176 }
6177 return ret;
6178 }
6179
6180
6181
6182
6183 public int getCompactPriority() {
6184 int count = Integer.MAX_VALUE;
6185 for (Store store : stores.values()) {
6186 count = Math.min(count, store.getCompactPriority());
6187 }
6188 return count;
6189 }
6190
6191
6192
6193
6194
6195
6196 public boolean needsCompaction() {
6197 for (Store store : stores.values()) {
6198 if(store.needsCompaction()) {
6199 return true;
6200 }
6201 }
6202 return false;
6203 }
6204
6205
6206 public RegionCoprocessorHost getCoprocessorHost() {
6207 return coprocessorHost;
6208 }
6209
6210
6211 public void setCoprocessorHost(final RegionCoprocessorHost coprocessorHost) {
6212 this.coprocessorHost = coprocessorHost;
6213 }
6214
6215 public void abortRegionServer(String msg) throws IOException {
6216 RegionServerServices rs = getRegionServerServices();
6217 if (rs instanceof HRegionServer) {
6218 ((HRegionServer)rs).abort(msg);
6219 }
6220 }
6221
6222
6223
6224
6225
6226
6227
6228
6229 public void startRegionOperation() throws IOException {
6230 startRegionOperation(Operation.ANY);
6231 }
6232
6233
6234
6235
6236
6237 protected void startRegionOperation(Operation op) throws IOException {
6238 switch (op) {
6239 case INCREMENT:
6240 case APPEND:
6241 case GET:
6242 case SCAN:
6243 case SPLIT_REGION:
6244 case MERGE_REGION:
6245 case PUT:
6246 case DELETE:
6247 case BATCH_MUTATE:
6248 case COMPACT_REGION:
6249
6250 if (isRecovering() && (this.disallowWritesInRecovering ||
6251 (op != Operation.PUT && op != Operation.DELETE && op != Operation.BATCH_MUTATE))) {
6252 throw new RegionInRecoveryException(this.getRegionNameAsString() +
6253 " is recovering; cannot take reads");
6254 }
6255 break;
6256 default:
6257 break;
6258 }
6259 if (op == Operation.MERGE_REGION || op == Operation.SPLIT_REGION
6260 || op == Operation.COMPACT_REGION) {
6261
6262
6263 return;
6264 }
6265 if (this.closing.get()) {
6266 throw new NotServingRegionException(getRegionNameAsString() + " is closing");
6267 }
6268 lock(lock.readLock());
6269 if (this.closed.get()) {
6270 lock.readLock().unlock();
6271 throw new NotServingRegionException(getRegionNameAsString() + " is closed");
6272 }
6273 try {
6274 if (coprocessorHost != null) {
6275 coprocessorHost.postStartRegionOperation(op);
6276 }
6277 } catch (Exception e) {
6278 lock.readLock().unlock();
6279 throw new IOException(e);
6280 }
6281 }
6282
6283
6284
6285
6286
6287
6288 public void closeRegionOperation() throws IOException {
6289 closeRegionOperation(Operation.ANY);
6290 }
6291
6292
6293
6294
6295
6296
6297
6298 public void closeRegionOperation(Operation operation) throws IOException {
6299 lock.readLock().unlock();
6300 if (coprocessorHost != null) {
6301 coprocessorHost.postCloseRegionOperation(operation);
6302 }
6303 }
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313
6314 private void startBulkRegionOperation(boolean writeLockNeeded)
6315 throws NotServingRegionException, RegionTooBusyException, InterruptedIOException {
6316 if (this.closing.get()) {
6317 throw new NotServingRegionException(getRegionNameAsString() + " is closing");
6318 }
6319 if (writeLockNeeded) lock(lock.writeLock());
6320 else lock(lock.readLock());
6321 if (this.closed.get()) {
6322 if (writeLockNeeded) lock.writeLock().unlock();
6323 else lock.readLock().unlock();
6324 throw new NotServingRegionException(getRegionNameAsString() + " is closed");
6325 }
6326 }
6327
6328
6329
6330
6331
6332 private void closeBulkRegionOperation(){
6333 if (lock.writeLock().isHeldByCurrentThread()) lock.writeLock().unlock();
6334 else lock.readLock().unlock();
6335 }
6336
6337
6338
6339
6340
6341 private void recordMutationWithoutWal(final Map<byte [], List<Cell>> familyMap) {
6342 numMutationsWithoutWAL.increment();
6343 if (numMutationsWithoutWAL.get() <= 1) {
6344 LOG.info("writing data to region " + this +
6345 " with WAL disabled. Data may be lost in the event of a crash.");
6346 }
6347
6348 long mutationSize = 0;
6349 for (List<Cell> cells: familyMap.values()) {
6350 assert cells instanceof RandomAccess;
6351 int listSize = cells.size();
6352 for (int i=0; i < listSize; i++) {
6353 Cell cell = cells.get(i);
6354
6355 mutationSize += KeyValueUtil.keyLength(cell) + cell.getValueLength();
6356 }
6357 }
6358
6359 dataInMemoryWithoutWAL.add(mutationSize);
6360 }
6361
6362 private void lock(final Lock lock)
6363 throws RegionTooBusyException, InterruptedIOException {
6364 lock(lock, 1);
6365 }
6366
6367
6368
6369
6370
6371
6372 private void lock(final Lock lock, final int multiplier)
6373 throws RegionTooBusyException, InterruptedIOException {
6374 try {
6375 final long waitTime = Math.min(maxBusyWaitDuration,
6376 busyWaitDuration * Math.min(multiplier, maxBusyWaitMultiplier));
6377 if (!lock.tryLock(waitTime, TimeUnit.MILLISECONDS)) {
6378 throw new RegionTooBusyException(
6379 "failed to get a lock in " + waitTime + " ms. " +
6380 "regionName=" + (this.getRegionInfo() == null ? "unknown" :
6381 this.getRegionInfo().getRegionNameAsString()) +
6382 ", server=" + (this.getRegionServerServices() == null ? "unknown" :
6383 this.getRegionServerServices().getServerName()));
6384 }
6385 } catch (InterruptedException ie) {
6386 LOG.info("Interrupted while waiting for a lock");
6387 InterruptedIOException iie = new InterruptedIOException();
6388 iie.initCause(ie);
6389 throw iie;
6390 }
6391 }
6392
6393
6394
6395
6396
6397
6398
6399 private void syncOrDefer(long txid, Durability durability) throws IOException {
6400 if (this.getRegionInfo().isMetaRegion()) {
6401 this.log.sync(txid);
6402 } else {
6403 switch(durability) {
6404 case USE_DEFAULT:
6405
6406 if (shouldSyncLog()) {
6407 this.log.sync(txid);
6408 }
6409 break;
6410 case SKIP_WAL:
6411
6412 break;
6413 case ASYNC_WAL:
6414
6415 break;
6416 case SYNC_WAL:
6417 case FSYNC_WAL:
6418
6419 this.log.sync(txid);
6420 break;
6421 }
6422 }
6423 }
6424
6425
6426
6427
6428 private boolean shouldSyncLog() {
6429 return durability.ordinal() > Durability.ASYNC_WAL.ordinal();
6430 }
6431
6432
6433
6434
6435 private static final List<Cell> MOCKED_LIST = new AbstractList<Cell>() {
6436
6437 @Override
6438 public void add(int index, Cell element) {
6439
6440 }
6441
6442 @Override
6443 public boolean addAll(int index, Collection<? extends Cell> c) {
6444 return false;
6445 }
6446
6447 @Override
6448 public KeyValue get(int index) {
6449 throw new UnsupportedOperationException();
6450 }
6451
6452 @Override
6453 public int size() {
6454 return 0;
6455 }
6456 };
6457
6458
6459
6460
6461
6462
6463
6464
6465
6466
6467
6468 public static void main(String[] args) throws IOException {
6469 if (args.length < 1) {
6470 printUsageAndExit(null);
6471 }
6472 boolean majorCompact = false;
6473 if (args.length > 1) {
6474 if (!args[1].toLowerCase().startsWith("major")) {
6475 printUsageAndExit("ERROR: Unrecognized option <" + args[1] + ">");
6476 }
6477 majorCompact = true;
6478 }
6479 final Path tableDir = new Path(args[0]);
6480 final Configuration c = HBaseConfiguration.create();
6481 final FileSystem fs = FileSystem.get(c);
6482 final Path logdir = new Path(c.get("hbase.tmp.dir"));
6483 final String logname = "hlog" + FSUtils.getTableName(tableDir) + System.currentTimeMillis();
6484
6485 final HLog log = HLogFactory.createHLog(fs, logdir, logname, c);
6486 try {
6487 processTable(fs, tableDir, log, c, majorCompact);
6488 } finally {
6489 log.close();
6490
6491 BlockCache bc = new CacheConfig(c).getBlockCache();
6492 if (bc != null) bc.shutdown();
6493 }
6494 }
6495
6496
6497
6498
6499 public long getOpenSeqNum() {
6500 return this.openSeqNum;
6501 }
6502
6503
6504
6505
6506
6507 public Map<byte[], Long> getMaxStoreSeqIdForLogReplay() {
6508 return this.maxSeqIdInStores;
6509 }
6510
6511
6512
6513
6514 public CompactionState getCompactionState() {
6515 boolean hasMajor = majorInProgress.get() > 0, hasMinor = minorInProgress.get() > 0;
6516 return (hasMajor ? (hasMinor ? CompactionState.MAJOR_AND_MINOR : CompactionState.MAJOR)
6517 : (hasMinor ? CompactionState.MINOR : CompactionState.NONE));
6518 }
6519
6520 public void reportCompactionRequestStart(boolean isMajor){
6521 (isMajor ? majorInProgress : minorInProgress).incrementAndGet();
6522 }
6523
6524 public void reportCompactionRequestEnd(boolean isMajor, int numFiles, long filesSizeCompacted){
6525 int newValue = (isMajor ? majorInProgress : minorInProgress).decrementAndGet();
6526
6527
6528 compactionsFinished.incrementAndGet();
6529 compactionNumFilesCompacted.addAndGet(numFiles);
6530 compactionNumBytesCompacted.addAndGet(filesSizeCompacted);
6531
6532 assert newValue >= 0;
6533 }
6534
6535
6536
6537
6538 public AtomicLong getSequenceId() {
6539 return this.sequenceId;
6540 }
6541
6542
6543
6544
6545
6546 private void setSequenceId(long value) {
6547 this.sequenceId.set(value);
6548 }
6549
6550
6551
6552
6553
6554
6555 public interface BulkLoadListener {
6556
6557
6558
6559
6560
6561
6562
6563
6564 String prepareBulkLoad(byte[] family, String srcPath) throws IOException;
6565
6566
6567
6568
6569
6570
6571
6572 void doneBulkLoad(byte[] family, String srcPath) throws IOException;
6573
6574
6575
6576
6577
6578
6579
6580 void failedBulkLoad(byte[] family, String srcPath) throws IOException;
6581 }
6582
6583 @VisibleForTesting class RowLockContext {
6584 private final HashedBytes row;
6585 private final CountDownLatch latch = new CountDownLatch(1);
6586 private final Thread thread;
6587 private int lockCount = 0;
6588
6589 RowLockContext(HashedBytes row) {
6590 this.row = row;
6591 this.thread = Thread.currentThread();
6592 }
6593
6594 boolean ownedByCurrentThread() {
6595 return thread == Thread.currentThread();
6596 }
6597
6598 RowLock newLock() {
6599 lockCount++;
6600 return new RowLock(this);
6601 }
6602
6603 void releaseLock() {
6604 if (!ownedByCurrentThread()) {
6605 throw new IllegalArgumentException("Lock held by thread: " + thread
6606 + " cannot be released by different thread: " + Thread.currentThread());
6607 }
6608 lockCount--;
6609 if (lockCount == 0) {
6610
6611 RowLockContext existingContext = lockedRows.remove(row);
6612 if (existingContext != this) {
6613 throw new RuntimeException(
6614 "Internal row lock state inconsistent, should not happen, row: " + row);
6615 }
6616 latch.countDown();
6617 }
6618 }
6619 }
6620
6621
6622
6623
6624
6625
6626 public static class RowLock {
6627 @VisibleForTesting final RowLockContext context;
6628 private boolean released = false;
6629
6630 @VisibleForTesting RowLock(RowLockContext context) {
6631 this.context = context;
6632 }
6633
6634
6635
6636
6637
6638
6639 public void release() {
6640 if (!released) {
6641 context.releaseLock();
6642 released = true;
6643 }
6644 }
6645 }
6646
6647
6648
6649
6650
6651
6652 public void updatesLock() throws RegionTooBusyException, InterruptedIOException {
6653 lock(updatesLock.readLock());
6654 }
6655
6656
6657
6658
6659
6660 public void updatesUnlock() throws InterruptedIOException {
6661 updatesLock.readLock().unlock();
6662 }
6663 }