001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 package org.apache.hadoop.mapreduce.tools; 019 020 import java.io.IOException; 021 import java.io.PrintWriter; 022 import java.util.ArrayList; 023 import java.util.List; 024 025 import org.apache.commons.logging.Log; 026 import org.apache.commons.logging.LogFactory; 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.classification.InterfaceAudience.Private; 030 import org.apache.hadoop.conf.Configuration; 031 import org.apache.hadoop.conf.Configured; 032 import org.apache.hadoop.ipc.RemoteException; 033 import org.apache.hadoop.mapred.JobConf; 034 import org.apache.hadoop.mapred.TIPStatus; 035 import org.apache.hadoop.mapreduce.Cluster; 036 import org.apache.hadoop.mapreduce.Counters; 037 import org.apache.hadoop.mapreduce.Job; 038 import org.apache.hadoop.mapreduce.JobID; 039 import org.apache.hadoop.mapreduce.JobPriority; 040 import org.apache.hadoop.mapreduce.JobStatus; 041 import org.apache.hadoop.mapreduce.TaskAttemptID; 042 import org.apache.hadoop.mapreduce.TaskCompletionEvent; 043 import org.apache.hadoop.mapreduce.TaskReport; 044 import org.apache.hadoop.mapreduce.TaskTrackerInfo; 045 import org.apache.hadoop.mapreduce.TaskType; 046 import org.apache.hadoop.mapreduce.jobhistory.HistoryViewer; 047 import org.apache.hadoop.mapreduce.v2.LogParams; 048 import org.apache.hadoop.security.AccessControlException; 049 import org.apache.hadoop.util.ExitUtil; 050 import org.apache.hadoop.util.Tool; 051 import org.apache.hadoop.util.ToolRunner; 052 import org.apache.hadoop.yarn.logaggregation.LogDumper; 053 054 /** 055 * Interprets the map reduce cli options 056 */ 057 @InterfaceAudience.Public 058 @InterfaceStability.Stable 059 public class CLI extends Configured implements Tool { 060 private static final Log LOG = LogFactory.getLog(CLI.class); 061 protected Cluster cluster; 062 063 public CLI() { 064 } 065 066 public CLI(Configuration conf) { 067 setConf(conf); 068 } 069 070 public int run(String[] argv) throws Exception { 071 int exitCode = -1; 072 if (argv.length < 1) { 073 displayUsage(""); 074 return exitCode; 075 } 076 // process arguments 077 String cmd = argv[0]; 078 String submitJobFile = null; 079 String jobid = null; 080 String taskid = null; 081 String historyFile = null; 082 String counterGroupName = null; 083 String counterName = null; 084 JobPriority jp = null; 085 String taskType = null; 086 String taskState = null; 087 int fromEvent = 0; 088 int nEvents = 0; 089 boolean getStatus = false; 090 boolean getCounter = false; 091 boolean killJob = false; 092 boolean listEvents = false; 093 boolean viewHistory = false; 094 boolean viewAllHistory = false; 095 boolean listJobs = false; 096 boolean listAllJobs = false; 097 boolean listActiveTrackers = false; 098 boolean listBlacklistedTrackers = false; 099 boolean displayTasks = false; 100 boolean killTask = false; 101 boolean failTask = false; 102 boolean setJobPriority = false; 103 boolean logs = false; 104 105 if ("-submit".equals(cmd)) { 106 if (argv.length != 2) { 107 displayUsage(cmd); 108 return exitCode; 109 } 110 submitJobFile = argv[1]; 111 } else if ("-status".equals(cmd)) { 112 if (argv.length != 2) { 113 displayUsage(cmd); 114 return exitCode; 115 } 116 jobid = argv[1]; 117 getStatus = true; 118 } else if("-counter".equals(cmd)) { 119 if (argv.length != 4) { 120 displayUsage(cmd); 121 return exitCode; 122 } 123 getCounter = true; 124 jobid = argv[1]; 125 counterGroupName = argv[2]; 126 counterName = argv[3]; 127 } else if ("-kill".equals(cmd)) { 128 if (argv.length != 2) { 129 displayUsage(cmd); 130 return exitCode; 131 } 132 jobid = argv[1]; 133 killJob = true; 134 } else if ("-set-priority".equals(cmd)) { 135 if (argv.length != 3) { 136 displayUsage(cmd); 137 return exitCode; 138 } 139 jobid = argv[1]; 140 try { 141 jp = JobPriority.valueOf(argv[2]); 142 } catch (IllegalArgumentException iae) { 143 LOG.info(iae); 144 displayUsage(cmd); 145 return exitCode; 146 } 147 setJobPriority = true; 148 } else if ("-events".equals(cmd)) { 149 if (argv.length != 4) { 150 displayUsage(cmd); 151 return exitCode; 152 } 153 jobid = argv[1]; 154 fromEvent = Integer.parseInt(argv[2]); 155 nEvents = Integer.parseInt(argv[3]); 156 listEvents = true; 157 } else if ("-history".equals(cmd)) { 158 if (argv.length != 2 && !(argv.length == 3 && "all".equals(argv[1]))) { 159 displayUsage(cmd); 160 return exitCode; 161 } 162 viewHistory = true; 163 if (argv.length == 3 && "all".equals(argv[1])) { 164 viewAllHistory = true; 165 historyFile = argv[2]; 166 } else { 167 historyFile = argv[1]; 168 } 169 } else if ("-list".equals(cmd)) { 170 if (argv.length != 1 && !(argv.length == 2 && "all".equals(argv[1]))) { 171 displayUsage(cmd); 172 return exitCode; 173 } 174 if (argv.length == 2 && "all".equals(argv[1])) { 175 listAllJobs = true; 176 } else { 177 listJobs = true; 178 } 179 } else if("-kill-task".equals(cmd)) { 180 if (argv.length != 2) { 181 displayUsage(cmd); 182 return exitCode; 183 } 184 killTask = true; 185 taskid = argv[1]; 186 } else if("-fail-task".equals(cmd)) { 187 if (argv.length != 2) { 188 displayUsage(cmd); 189 return exitCode; 190 } 191 failTask = true; 192 taskid = argv[1]; 193 } else if ("-list-active-trackers".equals(cmd)) { 194 if (argv.length != 1) { 195 displayUsage(cmd); 196 return exitCode; 197 } 198 listActiveTrackers = true; 199 } else if ("-list-blacklisted-trackers".equals(cmd)) { 200 if (argv.length != 1) { 201 displayUsage(cmd); 202 return exitCode; 203 } 204 listBlacklistedTrackers = true; 205 } else if ("-list-attempt-ids".equals(cmd)) { 206 if (argv.length != 4) { 207 displayUsage(cmd); 208 return exitCode; 209 } 210 jobid = argv[1]; 211 taskType = argv[2]; 212 taskState = argv[3]; 213 displayTasks = true; 214 } else if ("-logs".equals(cmd)) { 215 if (argv.length == 2 || argv.length ==3) { 216 logs = true; 217 jobid = argv[1]; 218 if (argv.length == 3) { 219 taskid = argv[2]; 220 } else { 221 taskid = null; 222 } 223 } else { 224 displayUsage(cmd); 225 return exitCode; 226 } 227 } else { 228 displayUsage(cmd); 229 return exitCode; 230 } 231 232 // initialize cluster 233 cluster = new Cluster(getConf()); 234 235 // Submit the request 236 try { 237 if (submitJobFile != null) { 238 Job job = Job.getInstance(new JobConf(submitJobFile)); 239 job.submit(); 240 System.out.println("Created job " + job.getJobID()); 241 exitCode = 0; 242 } else if (getStatus) { 243 Job job = cluster.getJob(JobID.forName(jobid)); 244 if (job == null) { 245 System.out.println("Could not find job " + jobid); 246 } else { 247 Counters counters = job.getCounters(); 248 System.out.println(); 249 System.out.println(job); 250 if (counters != null) { 251 System.out.println(counters); 252 } else { 253 System.out.println("Counters not available. Job is retired."); 254 } 255 exitCode = 0; 256 } 257 } else if (getCounter) { 258 Job job = cluster.getJob(JobID.forName(jobid)); 259 if (job == null) { 260 System.out.println("Could not find job " + jobid); 261 } else { 262 Counters counters = job.getCounters(); 263 if (counters == null) { 264 System.out.println("Counters not available for retired job " + 265 jobid); 266 exitCode = -1; 267 } else { 268 System.out.println(getCounter(counters, 269 counterGroupName, counterName)); 270 exitCode = 0; 271 } 272 } 273 } else if (killJob) { 274 Job job = cluster.getJob(JobID.forName(jobid)); 275 if (job == null) { 276 System.out.println("Could not find job " + jobid); 277 } else { 278 job.killJob(); 279 System.out.println("Killed job " + jobid); 280 exitCode = 0; 281 } 282 } else if (setJobPriority) { 283 Job job = cluster.getJob(JobID.forName(jobid)); 284 if (job == null) { 285 System.out.println("Could not find job " + jobid); 286 } else { 287 job.setPriority(jp); 288 System.out.println("Changed job priority."); 289 exitCode = 0; 290 } 291 } else if (viewHistory) { 292 viewHistory(historyFile, viewAllHistory); 293 exitCode = 0; 294 } else if (listEvents) { 295 listEvents(cluster.getJob(JobID.forName(jobid)), fromEvent, nEvents); 296 exitCode = 0; 297 } else if (listJobs) { 298 listJobs(cluster); 299 exitCode = 0; 300 } else if (listAllJobs) { 301 listAllJobs(cluster); 302 exitCode = 0; 303 } else if (listActiveTrackers) { 304 listActiveTrackers(cluster); 305 exitCode = 0; 306 } else if (listBlacklistedTrackers) { 307 listBlacklistedTrackers(cluster); 308 exitCode = 0; 309 } else if (displayTasks) { 310 displayTasks(cluster.getJob(JobID.forName(jobid)), taskType, taskState); 311 exitCode = 0; 312 } else if(killTask) { 313 TaskAttemptID taskID = TaskAttemptID.forName(taskid); 314 Job job = cluster.getJob(taskID.getJobID()); 315 if (job == null) { 316 System.out.println("Could not find job " + jobid); 317 } else if (job.killTask(taskID)) { 318 System.out.println("Killed task " + taskid); 319 exitCode = 0; 320 } else { 321 System.out.println("Could not kill task " + taskid); 322 exitCode = -1; 323 } 324 } else if(failTask) { 325 TaskAttemptID taskID = TaskAttemptID.forName(taskid); 326 Job job = cluster.getJob(taskID.getJobID()); 327 if (job == null) { 328 System.out.println("Could not find job " + jobid); 329 } else if(job.failTask(taskID)) { 330 System.out.println("Killed task " + taskID + " by failing it"); 331 exitCode = 0; 332 } else { 333 System.out.println("Could not fail task " + taskid); 334 exitCode = -1; 335 } 336 } else if (logs) { 337 try { 338 JobID jobID = JobID.forName(jobid); 339 TaskAttemptID taskAttemptID = TaskAttemptID.forName(taskid); 340 LogParams logParams = cluster.getLogParams(jobID, taskAttemptID); 341 LogDumper logDumper = new LogDumper(); 342 logDumper.setConf(getConf()); 343 exitCode = logDumper.dumpAContainersLogs(logParams.getApplicationId(), 344 logParams.getContainerId(), logParams.getNodeId(), 345 logParams.getOwner()); 346 } catch (IOException e) { 347 if (e instanceof RemoteException) { 348 throw e; 349 } 350 System.out.println(e.getMessage()); 351 } 352 } 353 } catch (RemoteException re) { 354 IOException unwrappedException = re.unwrapRemoteException(); 355 if (unwrappedException instanceof AccessControlException) { 356 System.out.println(unwrappedException.getMessage()); 357 } else { 358 throw re; 359 } 360 } finally { 361 cluster.close(); 362 } 363 return exitCode; 364 } 365 366 private String getJobPriorityNames() { 367 StringBuffer sb = new StringBuffer(); 368 for (JobPriority p : JobPriority.values()) { 369 sb.append(p.name()).append(" "); 370 } 371 return sb.substring(0, sb.length()-1); 372 } 373 374 private String getTaskTypess() { 375 StringBuffer sb = new StringBuffer(); 376 for (TaskType t : TaskType.values()) { 377 sb.append(t.name()).append(" "); 378 } 379 return sb.substring(0, sb.length()-1); 380 } 381 382 /** 383 * Display usage of the command-line tool and terminate execution. 384 */ 385 private void displayUsage(String cmd) { 386 String prefix = "Usage: CLI "; 387 String jobPriorityValues = getJobPriorityNames(); 388 String taskTypes = getTaskTypess(); 389 String taskStates = "running, completed"; 390 if ("-submit".equals(cmd)) { 391 System.err.println(prefix + "[" + cmd + " <job-file>]"); 392 } else if ("-status".equals(cmd) || "-kill".equals(cmd)) { 393 System.err.println(prefix + "[" + cmd + " <job-id>]"); 394 } else if ("-counter".equals(cmd)) { 395 System.err.println(prefix + "[" + cmd + 396 " <job-id> <group-name> <counter-name>]"); 397 } else if ("-events".equals(cmd)) { 398 System.err.println(prefix + "[" + cmd + 399 " <job-id> <from-event-#> <#-of-events>]. Event #s start from 1."); 400 } else if ("-history".equals(cmd)) { 401 System.err.println(prefix + "[" + cmd + " <jobHistoryFile>]"); 402 } else if ("-list".equals(cmd)) { 403 System.err.println(prefix + "[" + cmd + " [all]]"); 404 } else if ("-kill-task".equals(cmd) || "-fail-task".equals(cmd)) { 405 System.err.println(prefix + "[" + cmd + " <task-attempt-id>]"); 406 } else if ("-set-priority".equals(cmd)) { 407 System.err.println(prefix + "[" + cmd + " <job-id> <priority>]. " + 408 "Valid values for priorities are: " 409 + jobPriorityValues); 410 } else if ("-list-active-trackers".equals(cmd)) { 411 System.err.println(prefix + "[" + cmd + "]"); 412 } else if ("-list-blacklisted-trackers".equals(cmd)) { 413 System.err.println(prefix + "[" + cmd + "]"); 414 } else if ("-list-attempt-ids".equals(cmd)) { 415 System.err.println(prefix + "[" + cmd + 416 " <job-id> <task-type> <task-state>]. " + 417 "Valid values for <task-type> are " + taskTypes + ". " + 418 "Valid values for <task-state> are " + taskStates); 419 } else if ("-logs".equals(cmd)) { 420 System.err.println(prefix + "[" + cmd + 421 " <job-id> <task-attempt-id>]. " + 422 " <task-attempt-id> is optional to get task attempt logs."); 423 } else { 424 System.err.printf(prefix + "<command> <args>\n"); 425 System.err.printf("\t[-submit <job-file>]\n"); 426 System.err.printf("\t[-status <job-id>]\n"); 427 System.err.printf("\t[-counter <job-id> <group-name> <counter-name>]\n"); 428 System.err.printf("\t[-kill <job-id>]\n"); 429 System.err.printf("\t[-set-priority <job-id> <priority>]. " + 430 "Valid values for priorities are: " + jobPriorityValues + "\n"); 431 System.err.printf("\t[-events <job-id> <from-event-#> <#-of-events>]\n"); 432 System.err.printf("\t[-history <jobHistoryFile>]\n"); 433 System.err.printf("\t[-list [all]]\n"); 434 System.err.printf("\t[-list-active-trackers]\n"); 435 System.err.printf("\t[-list-blacklisted-trackers]\n"); 436 System.err.println("\t[-list-attempt-ids <job-id> <task-type> " + 437 "<task-state>]. " + 438 "Valid values for <task-type> are " + taskTypes + ". " + 439 "Valid values for <task-state> are " + taskStates); 440 System.err.printf("\t[-kill-task <task-attempt-id>]\n"); 441 System.err.printf("\t[-fail-task <task-attempt-id>]\n"); 442 System.err.printf("\t[-logs <job-id> <task-attempt-id>]\n\n"); 443 ToolRunner.printGenericCommandUsage(System.out); 444 } 445 } 446 447 private void viewHistory(String historyFile, boolean all) 448 throws IOException { 449 HistoryViewer historyViewer = new HistoryViewer(historyFile, 450 getConf(), all); 451 historyViewer.print(); 452 } 453 454 protected long getCounter(Counters counters, String counterGroupName, 455 String counterName) throws IOException { 456 return counters.findCounter(counterGroupName, counterName).getValue(); 457 } 458 459 /** 460 * List the events for the given job 461 * @param jobId the job id for the job's events to list 462 * @throws IOException 463 */ 464 private void listEvents(Job job, int fromEventId, int numEvents) 465 throws IOException, InterruptedException { 466 TaskCompletionEvent[] events = job. 467 getTaskCompletionEvents(fromEventId, numEvents); 468 System.out.println("Task completion events for " + job.getJobID()); 469 System.out.println("Number of events (from " + fromEventId + ") are: " 470 + events.length); 471 for(TaskCompletionEvent event: events) { 472 System.out.println(event.getStatus() + " " + 473 event.getTaskAttemptId() + " " + 474 getTaskLogURL(event.getTaskAttemptId(), event.getTaskTrackerHttp())); 475 } 476 } 477 478 protected static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) { 479 return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); 480 } 481 482 483 /** 484 * Dump a list of currently running jobs 485 * @throws IOException 486 */ 487 private void listJobs(Cluster cluster) 488 throws IOException, InterruptedException { 489 List<JobStatus> runningJobs = new ArrayList<JobStatus>(); 490 for (JobStatus job : cluster.getAllJobStatuses()) { 491 if (!job.isJobComplete()) { 492 runningJobs.add(job); 493 } 494 } 495 displayJobList(runningJobs.toArray(new JobStatus[0])); 496 } 497 498 /** 499 * Dump a list of all jobs submitted. 500 * @throws IOException 501 */ 502 private void listAllJobs(Cluster cluster) 503 throws IOException, InterruptedException { 504 displayJobList(cluster.getAllJobStatuses()); 505 } 506 507 /** 508 * Display the list of active trackers 509 */ 510 private void listActiveTrackers(Cluster cluster) 511 throws IOException, InterruptedException { 512 TaskTrackerInfo[] trackers = cluster.getActiveTaskTrackers(); 513 for (TaskTrackerInfo tracker : trackers) { 514 System.out.println(tracker.getTaskTrackerName()); 515 } 516 } 517 518 /** 519 * Display the list of blacklisted trackers 520 */ 521 private void listBlacklistedTrackers(Cluster cluster) 522 throws IOException, InterruptedException { 523 TaskTrackerInfo[] trackers = cluster.getBlackListedTaskTrackers(); 524 if (trackers.length > 0) { 525 System.out.println("BlackListedNode \t Reason"); 526 } 527 for (TaskTrackerInfo tracker : trackers) { 528 System.out.println(tracker.getTaskTrackerName() + "\t" + 529 tracker.getReasonForBlacklist()); 530 } 531 } 532 533 private void printTaskAttempts(TaskReport report) { 534 if (report.getCurrentStatus() == TIPStatus.COMPLETE) { 535 System.out.println(report.getSuccessfulTaskAttemptId()); 536 } else if (report.getCurrentStatus() == TIPStatus.RUNNING) { 537 for (TaskAttemptID t : 538 report.getRunningTaskAttemptIds()) { 539 System.out.println(t); 540 } 541 } 542 } 543 544 /** 545 * Display the information about a job's tasks, of a particular type and 546 * in a particular state 547 * 548 * @param job the job 549 * @param type the type of the task (map/reduce/setup/cleanup) 550 * @param state the state of the task 551 * (pending/running/completed/failed/killed) 552 */ 553 protected void displayTasks(Job job, String type, String state) 554 throws IOException, InterruptedException { 555 TaskReport[] reports = job.getTaskReports(TaskType.valueOf(type)); 556 for (TaskReport report : reports) { 557 TIPStatus status = report.getCurrentStatus(); 558 if ((state.equals("pending") && status ==TIPStatus.PENDING) || 559 (state.equals("running") && status ==TIPStatus.RUNNING) || 560 (state.equals("completed") && status == TIPStatus.COMPLETE) || 561 (state.equals("failed") && status == TIPStatus.FAILED) || 562 (state.equals("killed") && status == TIPStatus.KILLED)) { 563 printTaskAttempts(report); 564 } 565 } 566 } 567 568 public void displayJobList(JobStatus[] jobs) 569 throws IOException, InterruptedException { 570 displayJobList(jobs, new PrintWriter(System.out)); 571 } 572 573 @Private 574 public static String headerPattern = "%23s\t%10s\t%14s\t%12s\t%12s\t%10s\t%15s\t%15s\t%8s\t%8s\t%10s\t%10s\n"; 575 @Private 576 public static String dataPattern = "%23s\t%10s\t%14d\t%12s\t%12s\t%10s\t%15s\t%15s\t%8s\t%8s\t%10s\t%10s\n"; 577 private static String memPattern = "%dM"; 578 private static String UNAVAILABLE = "N/A"; 579 580 @Private 581 public void displayJobList(JobStatus[] jobs, PrintWriter writer) { 582 writer.println("Total jobs:" + jobs.length); 583 writer.printf(headerPattern, "JobId", "State", "StartTime", "UserName", 584 "Queue", "Priority", "UsedContainers", 585 "RsvdContainers", "UsedMem", "RsvdMem", "NeededMem", "AM info"); 586 for (JobStatus job : jobs) { 587 int numUsedSlots = job.getNumUsedSlots(); 588 int numReservedSlots = job.getNumReservedSlots(); 589 int usedMem = job.getUsedMem(); 590 int rsvdMem = job.getReservedMem(); 591 int neededMem = job.getNeededMem(); 592 writer.printf(dataPattern, 593 job.getJobID().toString(), job.getState(), job.getStartTime(), 594 job.getUsername(), job.getQueue(), 595 job.getPriority().name(), 596 numUsedSlots < 0 ? UNAVAILABLE : numUsedSlots, 597 numReservedSlots < 0 ? UNAVAILABLE : numReservedSlots, 598 usedMem < 0 ? UNAVAILABLE : String.format(memPattern, usedMem), 599 rsvdMem < 0 ? UNAVAILABLE : String.format(memPattern, rsvdMem), 600 neededMem < 0 ? UNAVAILABLE : String.format(memPattern, neededMem), 601 job.getSchedulingInfo()); 602 } 603 writer.flush(); 604 } 605 606 public static void main(String[] argv) throws Exception { 607 int res = ToolRunner.run(new CLI(), argv); 608 ExitUtil.terminate(res); 609 } 610 }