View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.chaos.monkies;
20  
21  import java.util.ArrayList;
22  import java.util.Collection;
23  import java.util.List;
24  
25  import org.apache.commons.lang.math.RandomUtils;
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.IntegrationTestingUtility;
29  import org.apache.hadoop.hbase.chaos.policies.Policy;
30  import org.apache.hadoop.hbase.util.Pair;
31  
32  /**
33   * A utility to injects faults in a running cluster.
34   * <p>
35   * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like
36   *  - Select a random server to kill
37   *  - Sleep for 5 sec
38   *  - Start the server on the same host
39   * Actions can also be complex events, like rolling restart of all of the servers.
40   * <p>
41   * Policies on the other hand are responsible for executing the actions based on a strategy.
42   * The default policy is to execute a random action every minute based on predefined action
43   * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
44   * policy can be active at any time.
45   * <p>
46   * Chaos monkey can be run from the command line, or can be invoked from integration tests.
47   * See {@link org.apache.hadoop.hbase.IntegrationTestIngest} or other integration tests that use
48   * chaos monkey for code examples.
49   * <p>
50   * ChaosMonkey class is indeed inspired by the Netflix's same-named tool:
51   * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html
52   */
53  public class PolicyBasedChaosMonkey extends ChaosMonkey {
54  
55    private static final Log LOG = LogFactory.getLog(PolicyBasedChaosMonkey.class);
56    private static final long ONE_SEC = 1000;
57    private static final long FIVE_SEC = 5 * ONE_SEC;
58    private static final long ONE_MIN = 60 * ONE_SEC;
59  
60    public static final long TIMEOUT = ONE_MIN;
61  
62    final IntegrationTestingUtility util;
63  
64    /**
65     * Construct a new ChaosMonkey
66     * @param util the HBaseIntegrationTestingUtility already configured
67     * @param policies custom policies to use
68     */
69    public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Policy... policies) {
70      this.util = util;
71      this.policies = policies;
72    }
73  
74    public PolicyBasedChaosMonkey(IntegrationTestingUtility util, Collection<Policy> policies) {
75      this.util = util;
76      this.policies = policies.toArray(new Policy[policies.size()]);
77    }
78  
79  
80    /** Selects a random item from the given items */
81    public static <T> T selectRandomItem(T[] items) {
82      return items[RandomUtils.nextInt(items.length)];
83    }
84  
85    /** Selects a random item from the given items with weights*/
86    public static <T> T selectWeightedRandomItem(List<Pair<T, Integer>> items) {
87      int totalWeight = 0;
88      for (Pair<T, Integer> pair : items) {
89        totalWeight += pair.getSecond();
90      }
91  
92      int cutoff = RandomUtils.nextInt(totalWeight);
93      int cummulative = 0;
94      T item = null;
95  
96      //warn: O(n)
97      for (int i=0; i<items.size(); i++) {
98        int curWeight = items.get(i).getSecond();
99        if ( cutoff < cummulative + curWeight) {
100         item = items.get(i).getFirst();
101         break;
102       }
103       cummulative += curWeight;
104     }
105 
106     return item;
107   }
108 
109   /** Selects and returns ceil(ratio * items.length) random items from the given array */
110   public static <T> List<T> selectRandomItems(T[] items, float ratio) {
111     int remaining = (int)Math.ceil(items.length * ratio);
112 
113     List<T> selectedItems = new ArrayList<T>(remaining);
114 
115     for (int i=0; i<items.length && remaining > 0; i++) {
116       if (RandomUtils.nextFloat() < ((float)remaining/(items.length-i))) {
117         selectedItems.add(items[i]);
118         remaining--;
119       }
120     }
121 
122     return selectedItems;
123   }
124 
125   private Policy[] policies;
126   private Thread[] monkeyThreads;
127 
128   @Override
129   public void start() throws Exception {
130     monkeyThreads = new Thread[policies.length];
131 
132     for (int i=0; i<policies.length; i++) {
133       policies[i].init(new Policy.PolicyContext(this.util));
134       Thread monkeyThread = new Thread(policies[i]);
135       monkeyThread.start();
136       monkeyThreads[i] = monkeyThread;
137     }
138   }
139 
140   @Override
141   public void stop(String why) {
142     for (Policy policy : policies) {
143       policy.stop(why);
144     }
145   }
146 
147   @Override
148   public boolean isStopped() {
149     return policies[0].isStopped();
150   }
151 
152   /**
153    * Wait for ChaosMonkey to stop.
154    * @throws InterruptedException
155    */
156   @Override
157   public void waitForStop() throws InterruptedException {
158     for (Thread monkeyThread : monkeyThreads) {
159       monkeyThread.join();
160     }
161   }
162 }