View Javadoc

1   /**
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.client;
21  
22  import org.apache.commons.logging.Log;
23  import org.apache.commons.logging.LogFactory;
24  import org.apache.hadoop.classification.InterfaceAudience;
25  import org.apache.hadoop.classification.InterfaceStability;
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.HRegionLocation;
29  import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
30  import org.apache.hadoop.hbase.util.Bytes;
31  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
32  
33  import java.io.IOException;
34  import java.util.AbstractMap.SimpleEntry;
35  import java.util.ArrayList;
36  import java.util.HashMap;
37  import java.util.HashSet;
38  import java.util.List;
39  import java.util.Map;
40  import java.util.Set;
41  import java.util.concurrent.ConcurrentHashMap;
42  import java.util.concurrent.ConcurrentSkipListMap;
43  import java.util.concurrent.LinkedBlockingQueue;
44  import java.util.concurrent.atomic.AtomicInteger;
45  import java.util.concurrent.atomic.AtomicLong;
46  
47  /**
48   * HTableMultiplexer provides a thread-safe non blocking PUT API across all the tables.
49   * Each put will be sharded into different buffer queues based on its destination region server.
50   * So each region server buffer queue will only have the puts which share the same destination.
51   * And each queue will have a flush worker thread to flush the puts request to the region server.
52   * If any queue is full, the HTableMultiplexer starts to drop the Put requests for that 
53   * particular queue.
54   * 
55   * Also all the puts will be retried as a configuration number before dropping.
56   * And the HTableMultiplexer can report the number of buffered requests and the number of the
57   * failed (dropped) requests in total or on per region server basis.
58   * 
59   * This class is thread safe.
60   */
61  @InterfaceAudience.Public
62  @InterfaceStability.Evolving
63  public class HTableMultiplexer {
64    private static final Log LOG = LogFactory.getLog(HTableMultiplexer.class.getName());
65    private static int poolID = 0;
66    
67    static final String TABLE_MULTIPLEXER_FLUSH_FREQ_MS = "hbase.tablemultiplexer.flush.frequency.ms";
68  
69    private Map<byte[], HTable> tableNameToHTableMap;
70  
71    /** The map between each region server to its corresponding buffer queue */
72    private Map<HRegionLocation, LinkedBlockingQueue<PutStatus>>
73      serverToBufferQueueMap;
74  
75    /** The map between each region server to its flush worker */
76    private Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap;
77  
78    private Configuration conf;
79    private int retryNum;
80    private int perRegionServerBufferQueueSize;
81    
82    /**
83     * 
84     * @param conf The HBaseConfiguration
85     * @param perRegionServerBufferQueueSize determines the max number of the buffered Put ops 
86     *         for each region server before dropping the request.
87     */
88    public HTableMultiplexer(Configuration conf,
89        int perRegionServerBufferQueueSize) throws ZooKeeperConnectionException {
90      this.conf = conf;
91      this.serverToBufferQueueMap = new ConcurrentHashMap<HRegionLocation,
92        LinkedBlockingQueue<PutStatus>>();
93      this.serverToFlushWorkerMap = new ConcurrentHashMap<HRegionLocation, HTableFlushWorker>();
94      this.tableNameToHTableMap = new ConcurrentSkipListMap<byte[], HTable>(Bytes.BYTES_COMPARATOR);
95      this.retryNum = conf.getInt("hbase.client.retries.number", 10);
96      this.perRegionServerBufferQueueSize = perRegionServerBufferQueueSize;
97    }
98  
99    /**
100    * The put request will be buffered by its corresponding buffer queue. Return false if the queue
101    * is already full.
102    * @param table
103    * @param put
104    * @return true if the request can be accepted by its corresponding buffer queue.
105    * @throws IOException
106    */
107   public boolean put(final byte[] table, final Put put) throws IOException {
108     return put(table, put, this.retryNum);
109   }
110 
111   /**
112    * The puts request will be buffered by their corresponding buffer queue. 
113    * Return the list of puts which could not be queued.
114    * @param table
115    * @param puts
116    * @return the list of puts which could not be queued
117    * @throws IOException
118    */
119   public List<Put> put(final byte[] table, final List<Put> puts)
120       throws IOException {
121     if (puts == null)
122       return null;
123     
124     List <Put> failedPuts = null;
125     boolean result;
126     for (Put put : puts) {
127       result = put(table, put, this.retryNum);
128       if (result == false) {
129         
130         // Create the failed puts list if necessary
131         if (failedPuts == null) {
132           failedPuts = new ArrayList<Put>();
133         }
134         // Add the put to the failed puts list
135         failedPuts.add(put);
136       }
137     }
138     return failedPuts;
139   }
140 
141   /**
142    * The put request will be buffered by its corresponding buffer queue. And the put request will be
143    * retried before dropping the request.
144    * Return false if the queue is already full.
145    * @param table
146    * @param put
147    * @param retry
148    * @return true if the request can be accepted by its corresponding buffer queue.
149    * @throws IOException
150    */
151   public boolean put(final byte[] table, final Put put, int retry)
152       throws IOException {
153     if (retry <= 0) {
154       return false;
155     }
156 
157     LinkedBlockingQueue<PutStatus> queue;
158     HTable htable = getHTable(table);
159     try {
160       htable.validatePut(put);
161       HRegionLocation loc = htable.getRegionLocation(put.getRow(), false);
162       if (loc != null) {
163         // Add the put pair into its corresponding queue.
164         queue = addNewRegionServer(loc, htable);
165         // Generate a MultiPutStatus obj and offer it into the queue
166         PutStatus s = new PutStatus(loc.getRegionInfo(), put, retry);
167         
168         return queue.offer(s);
169       }
170     } catch (Exception e) {
171       LOG.debug("Cannot process the put " + put + " because of " + e);
172     }
173     return false;
174   }
175 
176   /**
177    * @return the current HTableMultiplexerStatus
178    */
179   public HTableMultiplexerStatus getHTableMultiplexerStatus() {
180     return new HTableMultiplexerStatus(serverToFlushWorkerMap);
181   }
182 
183 
184   private HTable getHTable(final byte[] table) throws IOException {
185     HTable htable = this.tableNameToHTableMap.get(table);
186     if (htable == null) {
187       synchronized (this.tableNameToHTableMap) {
188         htable = this.tableNameToHTableMap.get(table);
189         if (htable == null)  {
190           htable = new HTable(conf, table);
191           this.tableNameToHTableMap.put(table, htable);
192         }
193       }
194     }
195     return htable;
196   }
197 
198   private synchronized LinkedBlockingQueue<PutStatus> addNewRegionServer(
199       HRegionLocation addr, HTable htable) {
200     LinkedBlockingQueue<PutStatus> queue =
201       serverToBufferQueueMap.get(addr);
202     if (queue == null) {
203       // Create a queue for the new region server
204       queue = new LinkedBlockingQueue<PutStatus>(perRegionServerBufferQueueSize);
205       serverToBufferQueueMap.put(addr, queue);
206 
207       // Create the flush worker
208       HTableFlushWorker worker = new HTableFlushWorker(conf, addr,
209           this, queue, htable);
210       this.serverToFlushWorkerMap.put(addr, worker);
211 
212       // Launch a daemon thread to flush the puts
213       // from the queue to its corresponding region server.
214       String name = "HTableFlushWorker-" + addr.getHostnamePort() + "-"
215           + (poolID++);
216       Thread t = new Thread(worker, name);
217       t.setDaemon(true);
218       t.start();
219     }
220     return queue;
221   }
222 
223   /**
224    * HTableMultiplexerStatus keeps track of the current status of the HTableMultiplexer.
225    * report the number of buffered requests and the number of the failed (dropped) requests
226    * in total or on per region server basis.
227    */
228   static class HTableMultiplexerStatus {
229     private long totalFailedPutCounter;
230     private long totalBufferedPutCounter;
231     private long maxLatency;
232     private long overallAverageLatency;
233     private Map<String, Long> serverToFailedCounterMap;
234     private Map<String, Long> serverToBufferedCounterMap;
235     private Map<String, Long> serverToAverageLatencyMap;
236     private Map<String, Long> serverToMaxLatencyMap;
237 
238     public HTableMultiplexerStatus(
239         Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap) {
240       this.totalBufferedPutCounter = 0;
241       this.totalFailedPutCounter = 0;
242       this.maxLatency = 0;
243       this.overallAverageLatency = 0;
244       this.serverToBufferedCounterMap = new HashMap<String, Long>();
245       this.serverToFailedCounterMap = new HashMap<String, Long>();
246       this.serverToAverageLatencyMap = new HashMap<String, Long>();
247       this.serverToMaxLatencyMap = new HashMap<String, Long>();
248       this.initialize(serverToFlushWorkerMap);
249     }
250 
251     private void initialize(
252         Map<HRegionLocation, HTableFlushWorker> serverToFlushWorkerMap) {
253       if (serverToFlushWorkerMap == null) {
254         return;
255       }
256 
257       long averageCalcSum = 0;
258       int averageCalcCount = 0;
259       for (Map.Entry<HRegionLocation, HTableFlushWorker> entry : serverToFlushWorkerMap
260           .entrySet()) {
261         HRegionLocation addr = entry.getKey();
262         HTableFlushWorker worker = entry.getValue();
263 
264         long bufferedCounter = worker.getTotalBufferedCount();
265         long failedCounter = worker.getTotalFailedCount();
266         long serverMaxLatency = worker.getMaxLatency();
267         AtomicAverageCounter averageCounter = worker.getAverageLatencyCounter();
268         // Get sum and count pieces separately to compute overall average
269         SimpleEntry<Long, Integer> averageComponents = averageCounter
270             .getComponents();
271         long serverAvgLatency = averageCounter.getAndReset();
272 
273         this.totalBufferedPutCounter += bufferedCounter;
274         this.totalFailedPutCounter += failedCounter;
275         if (serverMaxLatency > this.maxLatency) {
276           this.maxLatency = serverMaxLatency;
277         }
278         averageCalcSum += averageComponents.getKey();
279         averageCalcCount += averageComponents.getValue();
280 
281         this.serverToBufferedCounterMap.put(addr.getHostnamePort(),
282             bufferedCounter);
283         this.serverToFailedCounterMap
284             .put(addr.getHostnamePort(),
285             failedCounter);
286         this.serverToAverageLatencyMap.put(addr.getHostnamePort(),
287             serverAvgLatency);
288         this.serverToMaxLatencyMap
289             .put(addr.getHostnamePort(),
290             serverMaxLatency);
291       }
292       this.overallAverageLatency = averageCalcCount != 0 ? averageCalcSum
293           / averageCalcCount : 0;
294     }
295 
296     public long getTotalBufferedCounter() {
297       return this.totalBufferedPutCounter;
298     }
299 
300     public long getTotalFailedCounter() {
301       return this.totalFailedPutCounter;
302     }
303 
304     public long getMaxLatency() {
305       return this.maxLatency;
306     }
307 
308     public long getOverallAverageLatency() {
309       return this.overallAverageLatency;
310     }
311 
312     public Map<String, Long> getBufferedCounterForEachRegionServer() {
313       return this.serverToBufferedCounterMap;
314     }
315 
316     public Map<String, Long> getFailedCounterForEachRegionServer() {
317       return this.serverToFailedCounterMap;
318     }
319 
320     public Map<String, Long> getMaxLatencyForEachRegionServer() {
321       return this.serverToMaxLatencyMap;
322     }
323 
324     public Map<String, Long> getAverageLatencyForEachRegionServer() {
325       return this.serverToAverageLatencyMap;
326     }
327   }
328   
329   private static class PutStatus {
330     private final HRegionInfo regionInfo;
331     private final Put put;
332     private final int retryCount;
333     public PutStatus(final HRegionInfo regionInfo, final Put put,
334         final int retryCount) {
335       this.regionInfo = regionInfo;
336       this.put = put;
337       this.retryCount = retryCount;
338     }
339 
340     public HRegionInfo getRegionInfo() {
341       return regionInfo;
342     }
343     public Put getPut() {
344       return put;
345     }
346     public int getRetryCount() {
347       return retryCount;
348     }
349   }
350 
351   /**
352    * Helper to count the average over an interval until reset.
353    */
354   private static class AtomicAverageCounter {
355     private long sum;
356     private int count;
357 
358     public AtomicAverageCounter() {
359       this.sum = 0L;
360       this.count = 0;
361     }
362 
363     public synchronized long getAndReset() {
364       long result = this.get();
365       this.reset();
366       return result;
367     }
368 
369     public synchronized long get() {
370       if (this.count == 0) {
371         return 0;
372       }
373       return this.sum / this.count;
374     }
375 
376     public synchronized SimpleEntry<Long, Integer> getComponents() {
377       return new SimpleEntry<Long, Integer>(sum, count);
378     }
379 
380     public synchronized void reset() {
381       this.sum = 0l;
382       this.count = 0;
383     }
384 
385     public synchronized void add(long value) {
386       this.sum += value;
387       this.count++;
388     }
389   }
390 
391   private static class HTableFlushWorker implements Runnable {
392     private HRegionLocation addr;
393     private Configuration conf;
394     private LinkedBlockingQueue<PutStatus> queue;
395     private HTableMultiplexer htableMultiplexer;
396     private AtomicLong totalFailedPutCount;
397     private AtomicInteger currentProcessingPutCount;
398     private AtomicAverageCounter averageLatency;
399     private AtomicLong maxLatency;
400     private HTable htable; // For Multi
401     
402     public HTableFlushWorker(Configuration conf, HRegionLocation addr,
403         HTableMultiplexer htableMultiplexer,
404         LinkedBlockingQueue<PutStatus> queue, HTable htable) {
405       this.addr = addr;
406       this.conf = conf;
407       this.htableMultiplexer = htableMultiplexer;
408       this.queue = queue;
409       this.totalFailedPutCount = new AtomicLong(0);
410       this.currentProcessingPutCount = new AtomicInteger(0);
411       this.averageLatency = new AtomicAverageCounter();
412       this.maxLatency = new AtomicLong(0);
413       this.htable = htable;
414     }
415 
416     public long getTotalFailedCount() {
417       return totalFailedPutCount.get();
418     }
419 
420     public long getTotalBufferedCount() {
421       return queue.size() + currentProcessingPutCount.get();
422     }
423 
424     public AtomicAverageCounter getAverageLatencyCounter() {
425       return this.averageLatency;
426     }
427 
428     public long getMaxLatency() {
429       return this.maxLatency.getAndSet(0);
430     }
431 
432     private boolean resubmitFailedPut(PutStatus failedPutStatus,
433         HRegionLocation oldLoc) throws IOException {
434       Put failedPut = failedPutStatus.getPut();
435       // The currentPut is failed. So get the table name for the currentPut.
436       byte[] tableName = failedPutStatus.getRegionInfo().getTableName();
437       // Decrease the retry count
438       int retryCount = failedPutStatus.getRetryCount() - 1;
439       
440       if (retryCount <= 0) {
441         // Update the failed counter and no retry any more.
442         return false;
443       } else {
444         // Retry one more time
445         return this.htableMultiplexer.put(tableName, failedPut, retryCount);
446       }
447     }
448 
449     @Override
450     public void run() {
451       List<PutStatus> processingList = new ArrayList<PutStatus>();
452       /** 
453        * The frequency in milliseconds for the current thread to process the corresponding  
454        * buffer queue.  
455        **/
456       long frequency = conf.getLong(TABLE_MULTIPLEXER_FLUSH_FREQ_MS, 100);
457       
458       // initial delay
459       try {
460         Thread.sleep(frequency);
461       } catch (InterruptedException e) {
462       } // Ignore
463 
464       long start, elapsed;
465       int failedCount = 0;
466       while (true) {
467         try {
468           start = elapsed = EnvironmentEdgeManager.currentTimeMillis();
469 
470           // Clear the processingList, putToStatusMap and failedCount
471           processingList.clear();
472           failedCount = 0;
473           
474           // drain all the queued puts into the tmp list
475           queue.drainTo(processingList);
476           currentProcessingPutCount.set(processingList.size());
477 
478           if (processingList.size() > 0) {
479             ArrayList<Put> list = new ArrayList<Put>(processingList.size());
480             for (PutStatus putStatus: processingList) {
481               list.add(putStatus.getPut());
482             }
483             
484             // Process this multiput request
485             List<Put> failed = null;
486             Object[] results = new Object[list.size()];
487             try {
488               htable.batch(list, results);
489             } catch (IOException e) {
490               LOG.debug("Caught some exceptions " + e
491                   + " when flushing puts to region server " + addr.getHostnamePort());
492             } finally {
493               // mutate list so that it is empty for complete success, or
494               // contains only failed records
495               // results are returned in the same order as the requests in list
496               // walk the list backwards, so we can remove from list without
497               // impacting the indexes of earlier members
498               for (int i = results.length - 1; i >= 0; i--) {
499                 if (results[i] instanceof Result) {
500                   // successful Puts are removed from the list here.
501                   list.remove(i);
502                 }
503               }
504               failed = list;
505             }
506 
507             if (failed != null) {
508               if (failed.size() == processingList.size()) {
509                 // All the puts for this region server are failed. Going to retry it later
510                 for (PutStatus putStatus: processingList) {
511                   if (!resubmitFailedPut(putStatus, this.addr)) {
512                     failedCount++;
513                   }
514                 }
515               } else {
516                 Set<Put> failedPutSet = new HashSet<Put>(failed);
517                 for (PutStatus putStatus: processingList) {
518                   if (failedPutSet.contains(putStatus.getPut())
519                       && !resubmitFailedPut(putStatus, this.addr)) {
520                     failedCount++;
521                   }
522                 }
523               }
524             }
525             // Update the totalFailedCount
526             this.totalFailedPutCount.addAndGet(failedCount);
527             
528             elapsed = EnvironmentEdgeManager.currentTimeMillis() - start;
529             // Update latency counters
530             averageLatency.add(elapsed);
531             if (elapsed > maxLatency.get()) {
532               maxLatency.set(elapsed);
533             }
534             
535             // Log some basic info
536             if (LOG.isDebugEnabled()) {
537               LOG.debug("Processed " + currentProcessingPutCount
538                   + " put requests for " + addr.getHostnamePort() + " and "
539                   + failedCount + " failed" + ", latency for this send: "
540                   + elapsed);
541             }
542 
543             // Reset the current processing put count
544             currentProcessingPutCount.set(0);
545           }
546 
547           // Sleep for a while
548           if (elapsed == start) {
549             elapsed = EnvironmentEdgeManager.currentTimeMillis() - start;
550           }
551           if (elapsed < frequency) {
552             Thread.sleep(frequency - elapsed);
553           }
554         } catch (Exception e) {
555           // Log all the exceptions and move on
556           LOG.debug("Caught some exceptions " + e
557               + " when flushing puts to region server "
558               + addr.getHostnamePort());
559         }
560       }
561     }
562   }
563 }