1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.util;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Arrays;
24  import java.util.Collection;
25  import java.util.HashSet;
26  import java.util.LinkedList;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Queue;
30  import java.util.Random;
31  import java.util.Set;
32  
33  import org.apache.commons.cli.CommandLine;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.hbase.ClusterStatus;
38  import org.apache.hadoop.hbase.HBaseCluster;
39  import org.apache.hadoop.hbase.HBaseConfiguration;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.HServerLoad;
42  import org.apache.hadoop.hbase.IntegrationTestingUtility;
43  import org.apache.hadoop.hbase.IntegrationTestDataIngestWithChaosMonkey;
44  import org.apache.hadoop.hbase.ServerName;
45  import org.apache.hadoop.hbase.Stoppable;
46  import org.apache.hadoop.hbase.client.HBaseAdmin;
47  import org.apache.hadoop.util.StringUtils;
48  import org.apache.hadoop.util.ToolRunner;
49  
50  import com.google.common.collect.Lists;
51  import com.google.common.collect.Maps;
52  import com.google.protobuf.ServiceException;
53  
54  /**
55   * A utility to injects faults in a running cluster.
56   * <p>
57   * ChaosMonkey defines Action's and Policy's. Actions are sequences of events, like
58   *  - Select a random server to kill
59   *  - Sleep for 5 sec
60   *  - Start the server on the same host
61   * Actions can also be complex events, like rolling restart of all of the servers.
62   * <p>
63   * Policies on the other hand are responsible for executing the actions based on a strategy.
64   * The default policy is to execute a random action every minute based on predefined action
65   * weights. ChaosMonkey executes predefined named policies until it is stopped. More than one
66   * policy can be active at any time.
67   * <p>
68   * Chaos monkey can be run from the command line, or can be invoked from integration tests.
69   * See {@link IntegrationTestDataIngestWithChaosMonkey} or other integration tests that use
70   * chaos monkey for code examples.
71   * <p>
72   * ChaosMonkey class is indeed inspired by the Netflix's same-named tool:
73   * http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html
74   */
75  public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
76  
77    private static final Log LOG = LogFactory.getLog(ChaosMonkey.class);
78  
79    private static final long ONE_SEC = 1000;
80    private static final long FIVE_SEC = 5 * ONE_SEC;
81    private static final long ONE_MIN = 60 * ONE_SEC;
82    private static final long TIMEOUT = ONE_MIN;
83  
84    final IntegrationTestingUtility util;
85  
86    /**
87     * Construct a new ChaosMonkey
88     * @param util the HBaseIntegrationTestingUtility already configured
89     * @param policies names of pre-defined policies to use
90     */
91    public ChaosMonkey(IntegrationTestingUtility util, String... policies) {
92      this.util = util;
93      setPoliciesByName(policies);
94    }
95  
96    /**
97     * Construct a new ChaosMonkey
98     * @param util the HBaseIntegrationTestingUtility already configured
99     * @param policies custom policies to use
100    */
101   public ChaosMonkey(IntegrationTestingUtility util, Policy... policies) {
102     this.util = util;
103     this.policies = policies;
104   }
105 
106   private void setPoliciesByName(String... policies) {
107     this.policies = new Policy[policies.length];
108     for (int i=0; i < policies.length; i++) {
109       this.policies[i] = NAMED_POLICIES.get(policies[i]);
110     }
111   }
112 
113   /**
114    * Context for Action's
115    */
116   private static class ActionContext {
117     private IntegrationTestingUtility util;
118 
119     ActionContext(IntegrationTestingUtility util) {
120       this.util = util;
121     }
122 
123     IntegrationTestingUtility getHaseIntegrationTestingUtility() {
124       return util;
125     }
126 
127     HBaseCluster getHBaseCluster() {
128       return util.getHBaseClusterInterface();
129     }
130   }
131 
132   /**
133    * A (possibly mischievous) action that the ChaosMonkey can perform.
134    */
135   public static class Action {
136     // TODO: interesting question - should actions be implemented inside
137     //       ChaosMonkey, or outside? If they are inside (initial), the class becomes
138     //       huge and all-encompassing; if they are outside ChaosMonkey becomes just
139     //       a random task scheduler. For now, keep inside.
140 
141     protected ActionContext context;
142     protected HBaseCluster cluster;
143     protected ClusterStatus initialStatus;
144     protected ServerName[] initialServers;
145 
146     void init(ActionContext context) throws Exception {
147       this.context = context;
148       cluster = context.getHBaseCluster();
149       initialStatus = cluster.getInitialClusterStatus();
150       Collection<ServerName> regionServers = initialStatus.getServers();
151       initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
152     }
153 
154     void perform() throws Exception { };
155 
156     // TODO: perhaps these methods should be elsewhere?
157     /** Returns current region servers */
158     protected ServerName[] getCurrentServers() throws IOException {
159       Collection<ServerName> regionServers = cluster.getClusterStatus().getServers();
160       return regionServers.toArray(new ServerName[regionServers.size()]);
161     }
162 
163     protected void killMaster(ServerName server) throws IOException {
164       LOG.info("Killing master:" + server);
165       cluster.killMaster(server);
166       cluster.waitForMasterToStop(server, TIMEOUT);
167       LOG.info("Killed master server:" + server);
168     }
169 
170     protected void startMaster(ServerName server) throws IOException {
171       LOG.info("Starting master:" + server.getHostname());
172       cluster.startMaster(server.getHostname());
173       cluster.waitForActiveAndReadyMaster(TIMEOUT);
174       LOG.info("Started master: " + server);
175     }
176 
177     protected void killRs(ServerName server) throws IOException {
178       LOG.info("Killing region server:" + server);
179       cluster.killRegionServer(server);
180       cluster.waitForRegionServerToStop(server, TIMEOUT);
181       LOG.info("Killed region server:" + server + ". Reported num of rs:"
182           + cluster.getClusterStatus().getServersSize());
183     }
184 
185     protected void startRs(ServerName server) throws IOException {
186       LOG.info("Starting region server:" + server.getHostname());
187       cluster.startRegionServer(server.getHostname());
188       cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT);
189       LOG.info("Started region server:" + server + ". Reported num of rs:"
190           + cluster.getClusterStatus().getServersSize());
191     }
192   }
193 
194   private static class RestartActionBase extends Action {
195     long sleepTime; // how long should we sleep
196 
197     public RestartActionBase(long sleepTime) {
198       this.sleepTime = sleepTime;
199     }
200 
201     void sleep(long sleepTime) {
202       LOG.info("Sleeping for:" + sleepTime);
203       Threads.sleep(sleepTime);
204     }
205 
206     void restartMaster(ServerName server, long sleepTime) throws IOException {
207       killMaster(server);
208       sleep(sleepTime);
209       startMaster(server);
210     }
211 
212     void restartRs(ServerName server, long sleepTime) throws IOException {
213       killRs(server);
214       sleep(sleepTime);
215       startRs(server);
216     }
217   }
218 
219   public static class RestartActiveMaster extends RestartActionBase {
220     public RestartActiveMaster(long sleepTime) {
221       super(sleepTime);
222     }
223     @Override
224     void perform() throws Exception {
225       LOG.info("Performing action: Restart active master");
226 
227       ServerName master = cluster.getClusterStatus().getMaster();
228       restartMaster(master, sleepTime);
229     }
230   }
231 
232   public static class RestartRandomRs extends RestartActionBase {
233     public RestartRandomRs(long sleepTime) {
234       super(sleepTime);
235     }
236 
237     @Override
238     void perform() throws Exception {
239       LOG.info("Performing action: Restart random region server");
240       ServerName server = selectRandomItem(getCurrentServers());
241 
242       restartRs(server, sleepTime);
243     }
244   }
245 
246   public static class RestartRsHoldingMeta extends RestartRandomRs {
247     public RestartRsHoldingMeta(long sleepTime) {
248       super(sleepTime);
249     }
250     @Override
251     void perform() throws Exception {
252       LOG.info("Performing action: Restart region server holding META");
253       ServerName server = cluster.getServerHoldingMeta();
254       if (server == null) {
255         LOG.warn("No server is holding .META. right now.");
256         return;
257       }
258       restartRs(server, sleepTime);
259     }
260   }
261 
262   public static class RestartRsHoldingRoot extends RestartRandomRs {
263     public RestartRsHoldingRoot(long sleepTime) {
264       super(sleepTime);
265     }
266     @Override
267     void perform() throws Exception {
268       LOG.info("Performing action: Restart region server holding ROOT");
269       ServerName server = cluster.getServerHoldingMeta();
270       if (server == null) {
271         LOG.warn("No server is holding -ROOT- right now.");
272         return;
273       }
274       restartRs(server, sleepTime);
275     }
276   }
277 
278   /**
279    * Restarts a ratio of the running regionservers at the same time
280    */
281   public static class BatchRestartRs extends RestartActionBase {
282     float ratio; //ratio of regionservers to restart
283 
284     public BatchRestartRs(long sleepTime, float ratio) {
285       super(sleepTime);
286       this.ratio = ratio;
287     }
288 
289     @Override
290     void perform() throws Exception {
291       LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
292           (int)(ratio * 100)));
293       List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
294 
295       for (ServerName server : selectedServers) {
296         LOG.info("Killing region server:" + server);
297         cluster.killRegionServer(server);
298       }
299 
300       for (ServerName server : selectedServers) {
301         cluster.waitForRegionServerToStop(server, TIMEOUT);
302       }
303 
304       LOG.info("Killed " + selectedServers.size() + " region servers. Reported num of rs:"
305           + cluster.getClusterStatus().getServersSize());
306 
307       sleep(sleepTime);
308 
309       for (ServerName server : selectedServers) {
310         LOG.info("Starting region server:" + server.getHostname());
311         cluster.startRegionServer(server.getHostname());
312 
313       }
314       for (ServerName server : selectedServers) {
315         cluster.waitForRegionServerToStart(server.getHostname(), TIMEOUT);
316       }
317       LOG.info("Started " + selectedServers.size() +" region servers. Reported num of rs:"
318           + cluster.getClusterStatus().getServersSize());
319     }
320   }
321 
322   /**
323    * Restarts a ratio of the regionservers in a rolling fashion. At each step, either kills a
324    * server, or starts one, sleeping randomly (0-sleepTime) in between steps.
325    */
326   public static class RollingBatchRestartRs extends BatchRestartRs {
327     public RollingBatchRestartRs(long sleepTime, float ratio) {
328       super(sleepTime, ratio);
329     }
330 
331     @Override
332     void perform() throws Exception {
333       LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
334           (int)(ratio * 100)));
335       Random random = new Random();
336       List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
337 
338       Queue<ServerName> serversToBeKilled = new LinkedList<ServerName>(selectedServers);
339       Queue<ServerName> deadServers = new LinkedList<ServerName>();
340 
341       //
342       while (!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) {
343         boolean action = true; //action true = kill server, false = start server
344 
345         if (serversToBeKilled.isEmpty() || deadServers.isEmpty()) {
346           action = deadServers.isEmpty();
347         } else {
348           action = random.nextBoolean();
349         }
350 
351         if (action) {
352           ServerName server = serversToBeKilled.remove();
353           killRs(server);
354           deadServers.add(server);
355         } else {
356           ServerName server = deadServers.remove();
357           startRs(server);
358         }
359 
360         sleep(random.nextInt((int)sleepTime));
361       }
362     }
363   }
364 
365   public static class UnbalanceRegionsAction extends Action {
366     private double fractionOfRegions;
367     private double fractionOfServers;
368     private Random random = new Random();
369 
370     /**
371      * Unbalances the regions on the cluster by choosing "target" servers, and moving
372      * some regions from each of the non-target servers to random target servers.
373      * @param fractionOfRegions Fraction of regions to move from each server.
374      * @param fractionOfServers Fraction of servers to be chosen as targets.
375      */
376     public UnbalanceRegionsAction(double fractionOfRegions, double fractionOfServers) {
377       this.fractionOfRegions = fractionOfRegions;
378       this.fractionOfServers = fractionOfServers;
379     }
380 
381     @Override
382     void perform() throws Exception {
383       LOG.info("Unbalancing regions");
384       ClusterStatus status = this.cluster.getClusterStatus();
385       List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
386       int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
387       List<byte[]> targetServers = new ArrayList<byte[]>(targetServerCount);
388       for (int i = 0; i < targetServerCount; ++i) {
389         int victimIx = random.nextInt(victimServers.size());
390         String serverName = victimServers.remove(victimIx).getServerName();
391         targetServers.add(Bytes.toBytes(serverName));
392       }
393 
394       List<byte[]> victimRegions = new LinkedList<byte[]>();
395       for (ServerName server : victimServers) {
396         HServerLoad serverLoad = status.getLoad(server);
397         // Ugh.
398         List<byte[]> regions = new LinkedList<byte[]>(serverLoad.getRegionsLoad().keySet());
399         int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
400         LOG.debug("Removing " + victimRegionCount + " regions from " + server.getServerName());
401         for (int i = 0; i < victimRegionCount; ++i) {
402           int victimIx = random.nextInt(regions.size());
403           String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
404           victimRegions.add(Bytes.toBytes(regionId));
405         }
406       }
407 
408       LOG.info("Moving " + victimRegions.size() + " regions from " + victimServers.size()
409           + " servers to " + targetServers.size() + " different servers");
410       HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
411       for (byte[] victimRegion : victimRegions) {
412         int targetIx = random.nextInt(targetServers.size());
413         admin.move(victimRegion, targetServers.get(targetIx));
414       }
415     }
416   }
417 
418   public static class ForceBalancerAction extends Action {
419     @Override
420     void perform() throws Exception {
421       LOG.info("Balancing regions");
422       HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
423       boolean result = admin.balancer();
424       if (!result) {
425         LOG.error("Balancer didn't succeed");
426       }
427     }
428   }
429 
430   /**
431    * A context for a Policy
432    */
433   private static class PolicyContext extends ActionContext {
434     PolicyContext(IntegrationTestingUtility util) {
435       super(util);
436     }
437   }
438 
439   /**
440    * A policy to introduce chaos to the cluster
441    */
442   public static abstract class Policy extends StoppableImplementation implements Runnable {
443     PolicyContext context;
444     public void init(PolicyContext context) throws Exception {
445       this.context = context;
446     }
447   }
448 
449   /** A policy that runs multiple other policies one after the other */
450   public static class CompositeSequentialPolicy extends Policy {
451     private List<Policy> policies;
452     public CompositeSequentialPolicy(Policy... policies) {
453       this.policies = Arrays.asList(policies);
454     }
455 
456     @Override
457     public void stop(String why) {
458       super.stop(why);
459       for (Policy p : policies) {
460         p.stop(why);
461       }
462     }
463 
464     @Override
465     public void run() {
466       for (Policy p : policies) {
467         p.run();
468       }
469     }
470 
471     @Override
472     public void init(PolicyContext context) throws Exception {
473       super.init(context);
474       for (Policy p : policies) {
475         p.init(context);
476       }
477     }
478   }
479 
480   /** A policy which does stuff every time interval. */
481   public static abstract class PeriodicPolicy extends Policy {
482     private long periodMs;
483 
484     public PeriodicPolicy(long periodMs) {
485       this.periodMs = periodMs;
486     }
487 
488     @Override
489     public void run() {
490       // Add some jitter.
491       int jitter = new Random().nextInt((int)periodMs);
492       LOG.info("Sleeping for " + jitter + " to add jitter");
493       Threads.sleep(jitter);
494 
495       while (!isStopped()) {
496         long start = System.currentTimeMillis();
497         runOneIteration();
498 
499         if (isStopped()) return;
500         long sleepTime = periodMs - (System.currentTimeMillis() - start);
501         if (sleepTime > 0) {
502           LOG.info("Sleeping for: " + sleepTime);
503           Threads.sleep(sleepTime);
504         }
505       }
506     }
507 
508     protected abstract void runOneIteration();
509 
510     @Override
511     public void init(PolicyContext context) throws Exception {
512       super.init(context);
513       LOG.info("Using ChaosMonkey Policy: " + this.getClass() + ", period: " + periodMs);
514     }
515   }
516 
517 
518   /** A policy which performs a sequence of actions deterministically. */
519   public static class DoActionsOncePolicy extends PeriodicPolicy {
520     private List<Action> actions;
521 
522     public DoActionsOncePolicy(long periodMs, List<Action> actions) {
523       super(periodMs);
524       this.actions = new ArrayList<ChaosMonkey.Action>(actions);
525     }
526 
527     public DoActionsOncePolicy(long periodMs, Action... actions) {
528       this(periodMs, Arrays.asList(actions));
529     }
530 
531     @Override
532     protected void runOneIteration() {
533       if (actions.isEmpty()) {
534         this.stop("done");
535         return;
536       }
537       Action action = actions.remove(0);
538 
539       try {
540         action.perform();
541       } catch (Exception ex) {
542         LOG.warn("Exception occured during performing action: "
543             + StringUtils.stringifyException(ex));
544       }
545     }
546 
547     @Override
548     public void init(PolicyContext context) throws Exception {
549       super.init(context);
550       for (Action action : actions) {
551         action.init(this.context);
552       }
553     }
554   }
555 
556   /**
557    * A policy, which picks a random action according to the given weights,
558    * and performs it every configurable period.
559    */
560   public static class PeriodicRandomActionPolicy extends PeriodicPolicy {
561     private List<Pair<Action, Integer>> actions;
562 
563     public PeriodicRandomActionPolicy(long periodMs, List<Pair<Action, Integer>> actions) {
564       super(periodMs);
565       this.actions = actions;
566     }
567 
568     public PeriodicRandomActionPolicy(long periodMs, Pair<Action, Integer>... actions) {
569       // We don't expect it to be modified.
570       this(periodMs, Arrays.asList(actions));
571     }
572 
573     public PeriodicRandomActionPolicy(long periodMs, Action... actions) {
574       super(periodMs);
575       this.actions = new ArrayList<Pair<Action, Integer>>(actions.length);
576       for (Action action : actions) {
577         this.actions.add(new Pair<Action, Integer>(action, 1));
578       }
579     }
580 
581     @Override
582     protected void runOneIteration() {
583       Action action = selectWeightedRandomItem(actions);
584       try {
585         action.perform();
586       } catch (Exception ex) {
587         LOG.warn("Exception occured during performing action: "
588             + StringUtils.stringifyException(ex));
589       }
590     }
591 
592     @Override
593     public void init(PolicyContext context) throws Exception {
594       super.init(context);
595       for (Pair<Action, Integer> action : actions) {
596         action.getFirst().init(this.context);
597       }
598     }
599   }
600 
601   /** Selects a random item from the given items */
602   static <T> T selectRandomItem(T[] items) {
603     Random random = new Random();
604     return items[random.nextInt(items.length)];
605   }
606 
607   /** Selects a random item from the given items with weights*/
608   static <T> T selectWeightedRandomItem(List<Pair<T, Integer>> items) {
609     Random random = new Random();
610     int totalWeight = 0;
611     for (Pair<T, Integer> pair : items) {
612       totalWeight += pair.getSecond();
613     }
614 
615     int cutoff = random.nextInt(totalWeight);
616     int cummulative = 0;
617     T item = null;
618 
619     //warn: O(n)
620     for (int i=0; i<items.size(); i++) {
621       int curWeight = items.get(i).getSecond();
622       if ( cutoff < cummulative + curWeight) {
623         item = items.get(i).getFirst();
624         break;
625       }
626       cummulative += curWeight;
627     }
628 
629     return item;
630   }
631 
632   /** Selects and returns ceil(ratio * items.length) random items from the given array */
633   static <T> List<T> selectRandomItems(T[] items, float ratio) {
634     Random random = new Random();
635     int remaining = (int)Math.ceil(items.length * ratio);
636 
637     List<T> selectedItems = new ArrayList<T>(remaining);
638 
639     for (int i=0; i<items.length && remaining > 0; i++) {
640       if (random.nextFloat() < ((float)remaining/(items.length-i))) {
641         selectedItems.add(items[i]);
642         remaining--;
643       }
644     }
645 
646     return selectedItems;
647   }
648 
649   /**
650    * All actions that deal with RS's with the following weights (relative probabilities):
651    *  - Restart active master (sleep 5 sec)                    : 2
652    *  - Restart random regionserver (sleep 5 sec)              : 2
653    *  - Restart random regionserver (sleep 60 sec)             : 2
654    *  - Restart META regionserver (sleep 5 sec)                : 1
655    *  - Restart ROOT regionserver (sleep 5 sec)                : 1
656    *  - Batch restart of 50% of regionservers (sleep 5 sec)    : 2
657    *  - Rolling restart of 100% of regionservers (sleep 5 sec) : 2
658    */
659   @SuppressWarnings("unchecked")
660   private static final List<Pair<Action, Integer>> ALL_ACTIONS = Lists.newArrayList(
661       new Pair<Action,Integer>(new RestartActiveMaster(FIVE_SEC), 2),
662       new Pair<Action,Integer>(new RestartRandomRs(FIVE_SEC), 2),
663       new Pair<Action,Integer>(new RestartRandomRs(ONE_MIN), 2),
664       new Pair<Action,Integer>(new RestartRsHoldingMeta(FIVE_SEC), 1),
665       new Pair<Action,Integer>(new RestartRsHoldingRoot(FIVE_SEC), 1),
666       new Pair<Action,Integer>(new BatchRestartRs(FIVE_SEC, 0.5f), 2),
667       new Pair<Action,Integer>(new RollingBatchRestartRs(FIVE_SEC, 1.0f), 2)
668   );
669 
670   public static final String EVERY_MINUTE_RANDOM_ACTION_POLICY = "EVERY_MINUTE_RANDOM_ACTION_POLICY";
671 
672   private Policy[] policies;
673   private Thread[] monkeyThreads;
674 
675   public void start() throws Exception {
676     monkeyThreads = new Thread[policies.length];
677 
678     for (int i=0; i<policies.length; i++) {
679       policies[i].init(new PolicyContext(this.util));
680       Thread monkeyThread = new Thread(policies[i]);
681       monkeyThread.start();
682       monkeyThreads[i] = monkeyThread;
683     }
684   }
685 
686   @Override
687   public void stop(String why) {
688     for (Policy policy : policies) {
689       policy.stop(why);
690     }
691   }
692 
693   @Override
694   public boolean isStopped() {
695     return policies[0].isStopped();
696   }
697 
698   /**
699    * Wait for ChaosMonkey to stop.
700    * @throws InterruptedException
701    */
702   public void waitForStop() throws InterruptedException {
703     for (Thread monkeyThread : monkeyThreads) {
704       monkeyThread.join();
705     }
706   }
707 
708   private static final Map<String, Policy> NAMED_POLICIES = Maps.newHashMap();
709   static {
710     NAMED_POLICIES.put(EVERY_MINUTE_RANDOM_ACTION_POLICY,
711         new PeriodicRandomActionPolicy(ONE_MIN, ALL_ACTIONS));
712   }
713 
714   @Override
715   protected void addOptions() {
716     addOptWithArg("policy", "a named policy defined in ChaosMonkey.java. Possible values: "
717         + NAMED_POLICIES.keySet());
718     //we can add more options, and make policies more configurable
719   }
720 
721   @Override
722   protected void processOptions(CommandLine cmd) {
723     String[] policies = cmd.getOptionValues("policy");
724     if (policies != null) {
725       setPoliciesByName(policies);
726     }
727   }
728 
729   @Override
730   protected int doWork() throws Exception {
731     start();
732     waitForStop();
733     return 0;
734   }
735 
736   public static void main(String[] args) throws Exception {
737     Configuration conf = HBaseConfiguration.create();
738     IntegrationTestingUtility.setUseDistributedCluster(conf);
739     IntegrationTestingUtility util = new IntegrationTestingUtility(conf);
740     util.initializeCluster(1);
741 
742     ChaosMonkey monkey = new ChaosMonkey(util, EVERY_MINUTE_RANDOM_ACTION_POLICY);
743     int ret = ToolRunner.run(conf, monkey, args);
744     System.exit(ret);
745   }
746 
747 }