001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.InputStreamReader; 025 import java.net.InetSocketAddress; 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.List; 029 import java.util.Map; 030 import java.util.Vector; 031 032 import org.apache.commons.cli.CommandLine; 033 import org.apache.commons.cli.GnuParser; 034 import org.apache.commons.cli.HelpFormatter; 035 import org.apache.commons.cli.Options; 036 import org.apache.commons.cli.ParseException; 037 import org.apache.commons.logging.Log; 038 import org.apache.commons.logging.LogFactory; 039 import org.apache.hadoop.classification.InterfaceAudience; 040 import org.apache.hadoop.classification.InterfaceStability; 041 import org.apache.hadoop.conf.Configuration; 042 import org.apache.hadoop.fs.FileStatus; 043 import org.apache.hadoop.fs.FileSystem; 044 import org.apache.hadoop.fs.Path; 045 import org.apache.hadoop.yarn.api.ApplicationConstants; 046 import org.apache.hadoop.yarn.api.ClientRMProtocol; 047 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; 048 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; 049 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; 050 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; 051 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; 052 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; 053 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; 054 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; 055 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; 056 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; 057 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; 058 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; 059 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; 060 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; 061 import org.apache.hadoop.yarn.api.records.ApplicationId; 062 import org.apache.hadoop.yarn.api.records.ApplicationReport; 063 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; 064 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 065 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 066 import org.apache.hadoop.yarn.api.records.LocalResource; 067 import org.apache.hadoop.yarn.api.records.LocalResourceType; 068 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 069 import org.apache.hadoop.yarn.api.records.NodeReport; 070 import org.apache.hadoop.yarn.api.records.Priority; 071 import org.apache.hadoop.yarn.api.records.QueueACL; 072 import org.apache.hadoop.yarn.api.records.QueueInfo; 073 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; 074 import org.apache.hadoop.yarn.api.records.Resource; 075 import org.apache.hadoop.yarn.api.records.YarnApplicationState; 076 import org.apache.hadoop.yarn.conf.YarnConfiguration; 077 import org.apache.hadoop.yarn.exceptions.YarnRemoteException; 078 import org.apache.hadoop.yarn.ipc.YarnRPC; 079 import org.apache.hadoop.yarn.util.ConverterUtils; 080 import org.apache.hadoop.yarn.util.Records; 081 082 083 /** 084 * Client for Distributed Shell application submission to YARN. 085 * 086 * <p> The distributed shell client allows an application master to be launched that in turn would run 087 * the provided shell command on a set of containers. </p> 088 * 089 * <p>This client is meant to act as an example on how to write yarn-based applications. </p> 090 * 091 * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 092 * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 093 * provides a way for the client to get access to cluster information and to request for a 094 * new {@link ApplicationId}. <p> 095 * 096 * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 097 * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 098 * and application name, user submitting the application, the priority assigned to the application and the queue 099 * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} 100 * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 101 * the {@link ApplicationMaster} is launched. </p> 102 * 103 * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 104 * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 105 * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 106 * {@link ApplicationMaster}. <p> 107 * 108 * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 109 * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 110 * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 111 * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p> 112 * 113 */ 114 @InterfaceAudience.Public 115 @InterfaceStability.Unstable 116 public class Client { 117 118 private static final Log LOG = LogFactory.getLog(Client.class); 119 120 // Configuration 121 private Configuration conf; 122 123 // RPC to communicate to RM 124 private YarnRPC rpc; 125 126 // Handle to talk to the Resource Manager/Applications Manager 127 private ClientRMProtocol applicationsManager; 128 129 // Application master specific info to register a new Application with RM/ASM 130 private String appName = ""; 131 // App master priority 132 private int amPriority = 0; 133 // Queue for App master 134 private String amQueue = ""; 135 // User to run app master as 136 private String amUser = ""; 137 // Amt. of memory resource to request for to run the App Master 138 private int amMemory = 10; 139 140 // Application master jar file 141 private String appMasterJar = ""; 142 // Main class to invoke application master 143 private String appMasterMainClass = ""; 144 145 // Shell command to be executed 146 private String shellCommand = ""; 147 // Location of shell script 148 private String shellScriptPath = ""; 149 // Args to be passed to the shell command 150 private String shellArgs = ""; 151 // Env variables to be setup for the shell command 152 private Map<String, String> shellEnv = new HashMap<String, String>(); 153 // Shell Command Container priority 154 private int shellCmdPriority = 0; 155 156 // Amt of memory to request for container in which shell script will be executed 157 private int containerMemory = 10; 158 // No. of containers in which the shell script needs to be executed 159 private int numContainers = 1; 160 161 // log4j.properties file 162 // if available, add to local resources and set into classpath 163 private String log4jPropFile = ""; 164 165 // Start time for client 166 private final long clientStartTime = System.currentTimeMillis(); 167 // Timeout threshold for client. Kill app after time interval expires. 168 private long clientTimeout = 600000; 169 170 // Debug flag 171 boolean debugFlag = false; 172 173 /** 174 * @param args Command line arguments 175 */ 176 public static void main(String[] args) { 177 boolean result = false; 178 try { 179 Client client = new Client(); 180 LOG.info("Initializing Client"); 181 boolean doRun = client.init(args); 182 if (!doRun) { 183 System.exit(0); 184 } 185 result = client.run(); 186 } catch (Throwable t) { 187 LOG.fatal("Error running CLient", t); 188 System.exit(1); 189 } 190 if (result) { 191 LOG.info("Application completed successfully"); 192 System.exit(0); 193 } 194 LOG.error("Application failed to complete successfully"); 195 System.exit(2); 196 } 197 198 /** 199 */ 200 public Client(Configuration conf) throws Exception { 201 // Set up the configuration and RPC 202 this.conf = conf; 203 rpc = YarnRPC.create(conf); 204 } 205 206 /** 207 */ 208 public Client() throws Exception { 209 this(new Configuration()); 210 } 211 212 /** 213 * Helper function to print out usage 214 * @param opts Parsed command line options 215 */ 216 private void printUsage(Options opts) { 217 new HelpFormatter().printHelp("Client", opts); 218 } 219 220 /** 221 * Parse command line options 222 * @param args Parsed command line options 223 * @return Whether the init was successful to run the client 224 */ 225 public boolean init(String[] args) throws ParseException { 226 227 Options opts = new Options(); 228 opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); 229 opts.addOption("priority", true, "Application Priority. Default 0"); 230 opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); 231 opts.addOption("user", true, "User to run the application as"); 232 opts.addOption("timeout", true, "Application timeout in milliseconds"); 233 opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); 234 opts.addOption("jar", true, "Jar file containing the application master"); 235 opts.addOption("class", true, "Main class to be run for the Application Master."); 236 opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); 237 opts.addOption("shell_script", true, "Location of the shell script to be executed"); 238 opts.addOption("shell_args", true, "Command line args for the shell script"); 239 opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); 240 opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); 241 opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); 242 opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); 243 opts.addOption("log_properties", true, "log4j.properties file"); 244 opts.addOption("debug", false, "Dump out debug information"); 245 opts.addOption("help", false, "Print usage"); 246 CommandLine cliParser = new GnuParser().parse(opts, args); 247 248 if (args.length == 0) { 249 printUsage(opts); 250 throw new IllegalArgumentException("No args specified for client to initialize"); 251 } 252 253 if (cliParser.hasOption("help")) { 254 printUsage(opts); 255 return false; 256 } 257 258 if (cliParser.hasOption("debug")) { 259 debugFlag = true; 260 261 } 262 263 appName = cliParser.getOptionValue("appname", "DistributedShell"); 264 amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); 265 amQueue = cliParser.getOptionValue("queue", ""); 266 amUser = cliParser.getOptionValue("user", ""); 267 amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); 268 269 if (amMemory < 0) { 270 throw new IllegalArgumentException("Invalid memory specified for application master, exiting." 271 + " Specified memory=" + amMemory); 272 } 273 274 if (!cliParser.hasOption("jar")) { 275 throw new IllegalArgumentException("No jar file specified for application master"); 276 } 277 278 appMasterJar = cliParser.getOptionValue("jar"); 279 appMasterMainClass = cliParser.getOptionValue("class", 280 "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"); 281 282 if (!cliParser.hasOption("shell_command")) { 283 throw new IllegalArgumentException("No shell command specified to be executed by application master"); 284 } 285 shellCommand = cliParser.getOptionValue("shell_command"); 286 287 if (cliParser.hasOption("shell_script")) { 288 shellScriptPath = cliParser.getOptionValue("shell_script"); 289 } 290 if (cliParser.hasOption("shell_args")) { 291 shellArgs = cliParser.getOptionValue("shell_args"); 292 } 293 if (cliParser.hasOption("shell_env")) { 294 String envs[] = cliParser.getOptionValues("shell_env"); 295 for (String env : envs) { 296 env = env.trim(); 297 int index = env.indexOf('='); 298 if (index == -1) { 299 shellEnv.put(env, ""); 300 continue; 301 } 302 String key = env.substring(0, index); 303 String val = ""; 304 if (index < (env.length()-1)) { 305 val = env.substring(index+1); 306 } 307 shellEnv.put(key, val); 308 } 309 } 310 shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); 311 312 containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); 313 numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); 314 315 if (containerMemory < 0 || numContainers < 1) { 316 throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." 317 + " Specified containerMemory=" + containerMemory 318 + ", numContainer=" + numContainers); 319 } 320 321 clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); 322 323 log4jPropFile = cliParser.getOptionValue("log_properties", ""); 324 325 return true; 326 } 327 328 /** 329 * Main run function for the client 330 * @return true if application completed successfully 331 * @throws IOException 332 */ 333 public boolean run() throws IOException { 334 LOG.info("Starting Client"); 335 336 // Connect to ResourceManager 337 connectToASM(); 338 assert(applicationsManager != null); 339 340 // Use ClientRMProtocol handle to general cluster information 341 GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class); 342 GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq); 343 LOG.info("Got Cluster metric info from ASM" 344 + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers()); 345 346 GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); 347 GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); 348 LOG.info("Got Cluster node info from ASM"); 349 for (NodeReport node : clusterNodesResp.getNodeReports()) { 350 LOG.info("Got node report from ASM for" 351 + ", nodeId=" + node.getNodeId() 352 + ", nodeAddress" + node.getHttpAddress() 353 + ", nodeRackName" + node.getRackName() 354 + ", nodeNumContainers" + node.getNumContainers() 355 + ", nodeHealthStatus" + node.getNodeHealthStatus()); 356 } 357 358 GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class); 359 GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq); 360 QueueInfo queueInfo = queueInfoResp.getQueueInfo(); 361 LOG.info("Queue info" 362 + ", queueName=" + queueInfo.getQueueName() 363 + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() 364 + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() 365 + ", queueApplicationCount=" + queueInfo.getApplications().size() 366 + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); 367 368 GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class); 369 GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq); 370 List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList(); 371 for (QueueUserACLInfo aclInfo : listAclInfo) { 372 for (QueueACL userAcl : aclInfo.getUserAcls()) { 373 LOG.info("User ACL Info for Queue" 374 + ", queueName=" + aclInfo.getQueueName() 375 + ", userAcl=" + userAcl.name()); 376 } 377 } 378 379 // Get a new application id 380 GetNewApplicationResponse newApp = getApplication(); 381 ApplicationId appId = newApp.getApplicationId(); 382 383 // TODO get min/max resource capabilities from RM and change memory ask if needed 384 // If we do not have min/max, we may not be able to correctly request 385 // the required resources from the RM for the app master 386 // Memory ask has to be a multiple of min and less than max. 387 // Dump out information about cluster capability as seen by the resource manager 388 int minMem = newApp.getMinimumResourceCapability().getMemory(); 389 int maxMem = newApp.getMaximumResourceCapability().getMemory(); 390 LOG.info("Min mem capabililty of resources in this cluster " + minMem); 391 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 392 393 // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 394 // a multiple of the min value and cannot exceed the max. 395 // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min 396 if (amMemory < minMem) { 397 LOG.info("AM memory specified below min threshold of cluster. Using min value." 398 + ", specified=" + amMemory 399 + ", min=" + minMem); 400 amMemory = minMem; 401 } 402 else if (amMemory > maxMem) { 403 LOG.info("AM memory specified above max threshold of cluster. Using max value." 404 + ", specified=" + amMemory 405 + ", max=" + maxMem); 406 amMemory = maxMem; 407 } 408 409 // Create launch context for app master 410 LOG.info("Setting up application submission context for ASM"); 411 ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); 412 413 // set the application id 414 appContext.setApplicationId(appId); 415 // set the application name 416 appContext.setApplicationName(appName); 417 418 // Set up the container launch context for the application master 419 ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); 420 421 // set local resources for the application master 422 // local files or archives as needed 423 // In this scenario, the jar file for the application master is part of the local resources 424 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 425 426 LOG.info("Copy App Master jar from local filesystem and add to local environment"); 427 // Copy the application master jar to the filesystem 428 // Create a local resource to point to the destination jar path 429 FileSystem fs = FileSystem.get(conf); 430 Path src = new Path(appMasterJar); 431 String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; 432 Path dst = new Path(fs.getHomeDirectory(), pathSuffix); 433 fs.copyFromLocalFile(false, true, src, dst); 434 FileStatus destStatus = fs.getFileStatus(dst); 435 LocalResource amJarRsrc = Records.newRecord(LocalResource.class); 436 437 // Set the type of resource - file or archive 438 // archives are untarred at destination 439 // we don't need the jar file to be untarred for now 440 amJarRsrc.setType(LocalResourceType.FILE); 441 // Set visibility of the resource 442 // Setting to most private option 443 amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 444 // Set the resource to be copied over 445 amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 446 // Set timestamp and length of file so that the framework 447 // can do basic sanity checks for the local resource 448 // after it has been copied over to ensure it is the same 449 // resource the client intended to use with the application 450 amJarRsrc.setTimestamp(destStatus.getModificationTime()); 451 amJarRsrc.setSize(destStatus.getLen()); 452 localResources.put("AppMaster.jar", amJarRsrc); 453 454 // Set the log4j properties if needed 455 if (!log4jPropFile.isEmpty()) { 456 Path log4jSrc = new Path(log4jPropFile); 457 Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); 458 fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); 459 FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); 460 LocalResource log4jRsrc = Records.newRecord(LocalResource.class); 461 log4jRsrc.setType(LocalResourceType.FILE); 462 log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 463 log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); 464 log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); 465 log4jRsrc.setSize(log4jFileStatus.getLen()); 466 localResources.put("log4j.properties", log4jRsrc); 467 } 468 469 // The shell script has to be made available on the final container(s) 470 // where it will be executed. 471 // To do this, we need to first copy into the filesystem that is visible 472 // to the yarn framework. 473 // We do not need to set this as a local resource for the application 474 // master as the application master does not need it. 475 String hdfsShellScriptLocation = ""; 476 long hdfsShellScriptLen = 0; 477 long hdfsShellScriptTimestamp = 0; 478 if (!shellScriptPath.isEmpty()) { 479 Path shellSrc = new Path(shellScriptPath); 480 String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; 481 Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); 482 fs.copyFromLocalFile(false, true, shellSrc, shellDst); 483 hdfsShellScriptLocation = shellDst.toUri().toString(); 484 FileStatus shellFileStatus = fs.getFileStatus(shellDst); 485 hdfsShellScriptLen = shellFileStatus.getLen(); 486 hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); 487 } 488 489 // Set local resource info into app master container launch context 490 amContainer.setLocalResources(localResources); 491 492 // Set the necessary security tokens as needed 493 //amContainer.setContainerTokens(containerToken); 494 495 // Set the env variables to be setup in the env where the application master will be run 496 LOG.info("Set the environment for the application master"); 497 Map<String, String> env = new HashMap<String, String>(); 498 499 // put location of shell script into env 500 // using the env info, the application master will create the correct local resource for the 501 // eventual containers that will be launched to execute the shell scripts 502 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); 503 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); 504 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); 505 506 // Add AppMaster.jar location to classpath 507 // At some point we should not be required to add 508 // the hadoop specific classpaths to the env. 509 // It should be provided out of the box. 510 // For now setting all required classpaths including 511 // the classpath to "." for the application jar 512 StringBuilder classPathEnv = new StringBuilder("${CLASSPATH}:./*"); 513 for (String c : conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH) 514 .split(",")) { 515 classPathEnv.append(':'); 516 classPathEnv.append(c.trim()); 517 } 518 classPathEnv.append(":./log4j.properties"); 519 520 // add the runtime classpath needed for tests to work 521 String testRuntimeClassPath = Client.getTestRuntimeClasspath(); 522 classPathEnv.append(':'); 523 classPathEnv.append(testRuntimeClassPath); 524 525 env.put("CLASSPATH", classPathEnv.toString()); 526 527 amContainer.setEnvironment(env); 528 529 // Set the necessary command to execute the application master 530 Vector<CharSequence> vargs = new Vector<CharSequence>(30); 531 532 // Set java executable command 533 LOG.info("Setting up app master command"); 534 vargs.add("${JAVA_HOME}" + "/bin/java"); 535 // Set Xmx based on am memory size 536 vargs.add("-Xmx" + amMemory + "m"); 537 // Set class name 538 vargs.add(appMasterMainClass); 539 // Set params for Application Master 540 vargs.add("--container_memory " + String.valueOf(containerMemory)); 541 vargs.add("--num_containers " + String.valueOf(numContainers)); 542 vargs.add("--priority " + String.valueOf(shellCmdPriority)); 543 if (!shellCommand.isEmpty()) { 544 vargs.add("--shell_command " + shellCommand + ""); 545 } 546 if (!shellArgs.isEmpty()) { 547 vargs.add("--shell_args " + shellArgs + ""); 548 } 549 for (Map.Entry<String, String> entry : shellEnv.entrySet()) { 550 vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); 551 } 552 if (debugFlag) { 553 vargs.add("--debug"); 554 } 555 556 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); 557 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); 558 559 // Get final commmand 560 StringBuilder command = new StringBuilder(); 561 for (CharSequence str : vargs) { 562 command.append(str).append(" "); 563 } 564 565 LOG.info("Completed setting up app master command " + command.toString()); 566 List<String> commands = new ArrayList<String>(); 567 commands.add(command.toString()); 568 amContainer.setCommands(commands); 569 570 // For launching an AM Container, setting user here is not needed 571 // Set user in ApplicationSubmissionContext 572 // amContainer.setUser(amUser); 573 574 // Set up resource type requirements 575 // For now, only memory is supported so we set memory requirements 576 Resource capability = Records.newRecord(Resource.class); 577 capability.setMemory(amMemory); 578 amContainer.setResource(capability); 579 580 // Service data is a binary blob that can be passed to the application 581 // Not needed in this scenario 582 // amContainer.setServiceData(serviceData); 583 584 // The following are not required for launching an application master 585 // amContainer.setContainerId(containerId); 586 587 appContext.setAMContainerSpec(amContainer); 588 589 // Set the priority for the application master 590 Priority pri = Records.newRecord(Priority.class); 591 // TODO - what is the range for priority? how to decide? 592 pri.setPriority(amPriority); 593 appContext.setPriority(pri); 594 595 // Set the queue to which this application is to be submitted in the RM 596 appContext.setQueue(amQueue); 597 // Set the user submitting this application 598 // TODO can it be empty? 599 appContext.setUser(amUser); 600 601 // Create the request to send to the applications manager 602 SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); 603 appRequest.setApplicationSubmissionContext(appContext); 604 605 // Submit the application to the applications manager 606 // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); 607 // Ignore the response as either a valid response object is returned on success 608 // or an exception thrown to denote some form of a failure 609 LOG.info("Submitting application to ASM"); 610 applicationsManager.submitApplication(appRequest); 611 612 // TODO 613 // Try submitting the same request again 614 // app submission failure? 615 616 // Monitor the application 617 return monitorApplication(appId); 618 619 } 620 621 /** 622 * Monitor the submitted application for completion. 623 * Kill application if time expires. 624 * @param appId Application Id of application to be monitored 625 * @return true if application completed successfully 626 * @throws YarnRemoteException 627 */ 628 private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException { 629 630 while (true) { 631 632 // Check app status every 1 second. 633 try { 634 Thread.sleep(1000); 635 } catch (InterruptedException e) { 636 LOG.debug("Thread sleep in monitoring loop interrupted"); 637 } 638 639 // Get application report for the appId we are interested in 640 GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); 641 reportRequest.setApplicationId(appId); 642 GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest); 643 ApplicationReport report = reportResponse.getApplicationReport(); 644 645 LOG.info("Got application report from ASM for" 646 + ", appId=" + appId.getId() 647 + ", clientToken=" + report.getClientToken() 648 + ", appDiagnostics=" + report.getDiagnostics() 649 + ", appMasterHost=" + report.getHost() 650 + ", appQueue=" + report.getQueue() 651 + ", appMasterRpcPort=" + report.getRpcPort() 652 + ", appStartTime=" + report.getStartTime() 653 + ", yarnAppState=" + report.getYarnApplicationState().toString() 654 + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() 655 + ", appTrackingUrl=" + report.getTrackingUrl() 656 + ", appUser=" + report.getUser()); 657 658 YarnApplicationState state = report.getYarnApplicationState(); 659 FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); 660 if (YarnApplicationState.FINISHED == state) { 661 if (FinalApplicationStatus.SUCCEEDED == dsStatus) { 662 LOG.info("Application has completed successfully. Breaking monitoring loop"); 663 return true; 664 } 665 else { 666 LOG.info("Application did finished unsuccessfully." 667 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 668 + ". Breaking monitoring loop"); 669 return false; 670 } 671 } 672 else if (YarnApplicationState.KILLED == state 673 || YarnApplicationState.FAILED == state) { 674 LOG.info("Application did not finish." 675 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 676 + ". Breaking monitoring loop"); 677 return false; 678 } 679 680 if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { 681 LOG.info("Reached client specified timeout for application. Killing application"); 682 killApplication(appId); 683 return false; 684 } 685 } 686 687 } 688 689 /** 690 * Kill a submitted application by sending a call to the ASM 691 * @param appId Application Id to be killed. 692 * @throws YarnRemoteException 693 */ 694 private void killApplication(ApplicationId appId) throws YarnRemoteException { 695 KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); 696 // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 697 // the same time. 698 // If yes, can we kill a particular attempt only? 699 request.setApplicationId(appId); 700 // KillApplicationResponse response = applicationsManager.forceKillApplication(request); 701 // Response can be ignored as it is non-null on success or 702 // throws an exception in case of failures 703 applicationsManager.forceKillApplication(request); 704 } 705 706 /** 707 * Connect to the Resource Manager/Applications Manager 708 * @return Handle to communicate with the ASM 709 * @throws IOException 710 */ 711 private void connectToASM() throws IOException { 712 713 /* 714 UserGroupInformation user = UserGroupInformation.getCurrentUser(); 715 applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() { 716 public ClientRMProtocol run() { 717 InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get( 718 YarnConfiguration.RM_SCHEDULER_ADDRESS, 719 YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); 720 LOG.info("Connecting to ResourceManager at " + rmAddress); 721 Configuration appsManagerServerConf = new Configuration(conf); 722 appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, 723 ClientRMSecurityInfo.class, SecurityInfo.class); 724 ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf)); 725 return asm; 726 } 727 }); 728 */ 729 YarnConfiguration yarnConf = new YarnConfiguration(conf); 730 InetSocketAddress rmAddress = yarnConf.getSocketAddr( 731 YarnConfiguration.RM_ADDRESS, 732 YarnConfiguration.DEFAULT_RM_ADDRESS, 733 YarnConfiguration.DEFAULT_RM_PORT); 734 LOG.info("Connecting to ResourceManager at " + rmAddress); 735 applicationsManager = ((ClientRMProtocol) rpc.getProxy( 736 ClientRMProtocol.class, rmAddress, conf)); 737 } 738 739 /** 740 * Get a new application from the ASM 741 * @return New Application 742 * @throws YarnRemoteException 743 */ 744 private GetNewApplicationResponse getApplication() throws YarnRemoteException { 745 GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); 746 GetNewApplicationResponse response = applicationsManager.getNewApplication(request); 747 LOG.info("Got new application id=" + response.getApplicationId()); 748 return response; 749 } 750 751 private static String getTestRuntimeClasspath() { 752 753 InputStream classpathFileStream = null; 754 BufferedReader reader = null; 755 String envClassPath = ""; 756 757 LOG.info("Trying to generate classpath for app master from current thread's classpath"); 758 try { 759 760 // Create classpath from generated classpath 761 // Check maven ppom.xml for generated classpath info 762 // Works if compile time env is same as runtime. Mainly tests. 763 ClassLoader thisClassLoader = 764 Thread.currentThread().getContextClassLoader(); 765 String generatedClasspathFile = "yarn-apps-ds-generated-classpath"; 766 classpathFileStream = 767 thisClassLoader.getResourceAsStream(generatedClasspathFile); 768 if (classpathFileStream == null) { 769 LOG.info("Could not classpath resource from class loader"); 770 return envClassPath; 771 } 772 LOG.info("Readable bytes from stream=" + classpathFileStream.available()); 773 reader = new BufferedReader(new InputStreamReader(classpathFileStream)); 774 String cp = reader.readLine(); 775 if (cp != null) { 776 envClassPath += cp.trim() + ":"; 777 } 778 // Put the file itself on classpath for tasks. 779 envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile(); 780 } catch (IOException e) { 781 LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage()); 782 } 783 784 try { 785 if (classpathFileStream != null) { 786 classpathFileStream.close(); 787 } 788 if (reader != null) { 789 reader.close(); 790 } 791 } catch (IOException e) { 792 LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage()); 793 } 794 return envClassPath; 795 } 796 797 }