001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.InputStreamReader; 025 import java.net.InetSocketAddress; 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.List; 029 import java.util.Map; 030 import java.util.Vector; 031 032 import org.apache.commons.cli.CommandLine; 033 import org.apache.commons.cli.GnuParser; 034 import org.apache.commons.cli.HelpFormatter; 035 import org.apache.commons.cli.Options; 036 import org.apache.commons.cli.ParseException; 037 import org.apache.commons.logging.Log; 038 import org.apache.commons.logging.LogFactory; 039 import org.apache.hadoop.classification.InterfaceAudience; 040 import org.apache.hadoop.classification.InterfaceStability; 041 import org.apache.hadoop.conf.Configuration; 042 import org.apache.hadoop.fs.FileStatus; 043 import org.apache.hadoop.fs.FileSystem; 044 import org.apache.hadoop.fs.Path; 045 import org.apache.hadoop.net.NetUtils; 046 import org.apache.hadoop.security.SecurityInfo; 047 import org.apache.hadoop.yarn.api.ApplicationConstants; 048 import org.apache.hadoop.yarn.api.ClientRMProtocol; 049 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; 050 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; 051 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; 052 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; 053 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; 054 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; 055 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; 056 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; 057 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; 058 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; 059 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; 060 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; 061 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; 062 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; 063 import org.apache.hadoop.yarn.api.records.ApplicationId; 064 import org.apache.hadoop.yarn.api.records.ApplicationReport; 065 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; 066 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 067 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 068 import org.apache.hadoop.yarn.api.records.LocalResource; 069 import org.apache.hadoop.yarn.api.records.LocalResourceType; 070 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 071 import org.apache.hadoop.yarn.api.records.NodeReport; 072 import org.apache.hadoop.yarn.api.records.Priority; 073 import org.apache.hadoop.yarn.api.records.QueueACL; 074 import org.apache.hadoop.yarn.api.records.QueueInfo; 075 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; 076 import org.apache.hadoop.yarn.api.records.Resource; 077 import org.apache.hadoop.yarn.api.records.YarnApplicationState; 078 import org.apache.hadoop.yarn.conf.YarnConfiguration; 079 import org.apache.hadoop.yarn.exceptions.YarnRemoteException; 080 import org.apache.hadoop.yarn.ipc.YarnRPC; 081 import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; 082 import org.apache.hadoop.yarn.util.ConverterUtils; 083 import org.apache.hadoop.yarn.util.Records; 084 085 086 /** 087 * Client for Distributed Shell application submission to YARN. 088 * 089 * <p> The distributed shell client allows an application master to be launched that in turn would run 090 * the provided shell command on a set of containers. </p> 091 * 092 * <p>This client is meant to act as an example on how to write yarn-based applications. </p> 093 * 094 * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 095 * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 096 * provides a way for the client to get access to cluster information and to request for a 097 * new {@link ApplicationId}. <p> 098 * 099 * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 100 * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 101 * and application name, user submitting the application, the priority assigned to the application and the queue 102 * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} 103 * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 104 * the {@link ApplicationMaster} is launched. </p> 105 * 106 * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 107 * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 108 * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 109 * {@link ApplicationMaster}. <p> 110 * 111 * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 112 * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 113 * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 114 * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p> 115 * 116 */ 117 @InterfaceAudience.Public 118 @InterfaceStability.Unstable 119 public class Client { 120 121 private static final Log LOG = LogFactory.getLog(Client.class); 122 123 // Configuration 124 private Configuration conf; 125 126 // RPC to communicate to RM 127 private YarnRPC rpc; 128 129 // Handle to talk to the Resource Manager/Applications Manager 130 private ClientRMProtocol applicationsManager; 131 132 // Application master specific info to register a new Application with RM/ASM 133 private String appName = ""; 134 // App master priority 135 private int amPriority = 0; 136 // Queue for App master 137 private String amQueue = ""; 138 // User to run app master as 139 private String amUser = ""; 140 // Amt. of memory resource to request for to run the App Master 141 private int amMemory = 10; 142 143 // Application master jar file 144 private String appMasterJar = ""; 145 // Main class to invoke application master 146 private String appMasterMainClass = ""; 147 148 // Shell command to be executed 149 private String shellCommand = ""; 150 // Location of shell script 151 private String shellScriptPath = ""; 152 // Args to be passed to the shell command 153 private String shellArgs = ""; 154 // Env variables to be setup for the shell command 155 private Map<String, String> shellEnv = new HashMap<String, String>(); 156 // Shell Command Container priority 157 private int shellCmdPriority = 0; 158 159 // Amt of memory to request for container in which shell script will be executed 160 private int containerMemory = 10; 161 // No. of containers in which the shell script needs to be executed 162 private int numContainers = 1; 163 164 // log4j.properties file 165 // if available, add to local resources and set into classpath 166 private String log4jPropFile = ""; 167 168 // Start time for client 169 private final long clientStartTime = System.currentTimeMillis(); 170 // Timeout threshold for client. Kill app after time interval expires. 171 private long clientTimeout = 600000; 172 173 // Debug flag 174 boolean debugFlag = false; 175 176 /** 177 * @param args Command line arguments 178 */ 179 public static void main(String[] args) { 180 boolean result = false; 181 try { 182 Client client = new Client(); 183 LOG.info("Initializing Client"); 184 boolean doRun = client.init(args); 185 if (!doRun) { 186 System.exit(0); 187 } 188 result = client.run(); 189 } catch (Throwable t) { 190 LOG.fatal("Error running CLient", t); 191 System.exit(1); 192 } 193 if (result) { 194 LOG.info("Application completed successfully"); 195 System.exit(0); 196 } 197 LOG.error("Application failed to complete successfully"); 198 System.exit(2); 199 } 200 201 /** 202 */ 203 public Client() throws Exception { 204 // Set up the configuration and RPC 205 conf = new Configuration(); 206 rpc = YarnRPC.create(conf); 207 } 208 209 /** 210 * Helper function to print out usage 211 * @param opts Parsed command line options 212 */ 213 private void printUsage(Options opts) { 214 new HelpFormatter().printHelp("Client", opts); 215 } 216 217 /** 218 * Parse command line options 219 * @param args Parsed command line options 220 * @return Whether the init was successful to run the client 221 */ 222 public boolean init(String[] args) throws ParseException { 223 224 Options opts = new Options(); 225 opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); 226 opts.addOption("priority", true, "Application Priority. Default 0"); 227 opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); 228 opts.addOption("user", true, "User to run the application as"); 229 opts.addOption("timeout", true, "Application timeout in milliseconds"); 230 opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); 231 opts.addOption("jar", true, "Jar file containing the application master"); 232 opts.addOption("class", true, "Main class to be run for the Application Master."); 233 opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); 234 opts.addOption("shell_script", true, "Location of the shell script to be executed"); 235 opts.addOption("shell_args", true, "Command line args for the shell script"); 236 opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); 237 opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); 238 opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); 239 opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); 240 opts.addOption("log_properties", true, "log4j.properties file"); 241 opts.addOption("debug", false, "Dump out debug information"); 242 opts.addOption("help", false, "Print usage"); 243 CommandLine cliParser = new GnuParser().parse(opts, args); 244 245 if (args.length == 0) { 246 printUsage(opts); 247 throw new IllegalArgumentException("No args specified for client to initialize"); 248 } 249 250 if (cliParser.hasOption("help")) { 251 printUsage(opts); 252 return false; 253 } 254 255 if (cliParser.hasOption("debug")) { 256 debugFlag = true; 257 258 } 259 260 appName = cliParser.getOptionValue("appname", "DistributedShell"); 261 amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); 262 amQueue = cliParser.getOptionValue("queue", ""); 263 amUser = cliParser.getOptionValue("user", ""); 264 amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); 265 266 if (amMemory < 0) { 267 throw new IllegalArgumentException("Invalid memory specified for application master, exiting." 268 + " Specified memory=" + amMemory); 269 } 270 271 if (!cliParser.hasOption("jar")) { 272 throw new IllegalArgumentException("No jar file specified for application master"); 273 } 274 275 appMasterJar = cliParser.getOptionValue("jar"); 276 appMasterMainClass = cliParser.getOptionValue("class", 277 "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"); 278 279 if (!cliParser.hasOption("shell_command")) { 280 throw new IllegalArgumentException("No shell command specified to be executed by application master"); 281 } 282 shellCommand = cliParser.getOptionValue("shell_command"); 283 284 if (cliParser.hasOption("shell_script")) { 285 shellScriptPath = cliParser.getOptionValue("shell_script"); 286 } 287 if (cliParser.hasOption("shell_args")) { 288 shellArgs = cliParser.getOptionValue("shell_args"); 289 } 290 if (cliParser.hasOption("shell_env")) { 291 String envs[] = cliParser.getOptionValues("shell_env"); 292 for (String env : envs) { 293 env = env.trim(); 294 int index = env.indexOf('='); 295 if (index == -1) { 296 shellEnv.put(env, ""); 297 continue; 298 } 299 String key = env.substring(0, index); 300 String val = ""; 301 if (index < (env.length()-1)) { 302 val = env.substring(index+1); 303 } 304 shellEnv.put(key, val); 305 } 306 } 307 shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); 308 309 containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); 310 numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); 311 312 if (containerMemory < 0 || numContainers < 1) { 313 throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." 314 + " Specified containerMemory=" + containerMemory 315 + ", numContainer=" + numContainers); 316 } 317 318 clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); 319 320 log4jPropFile = cliParser.getOptionValue("log_properties", ""); 321 322 return true; 323 } 324 325 /** 326 * Main run function for the client 327 * @return true if application completed successfully 328 * @throws IOException 329 */ 330 public boolean run() throws IOException { 331 LOG.info("Starting Client"); 332 333 // Connect to ResourceManager 334 connectToASM(); 335 assert(applicationsManager != null); 336 337 // Use ClientRMProtocol handle to general cluster information 338 GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class); 339 GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq); 340 LOG.info("Got Cluster metric info from ASM" 341 + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers()); 342 343 GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); 344 GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); 345 LOG.info("Got Cluster node info from ASM"); 346 for (NodeReport node : clusterNodesResp.getNodeReports()) { 347 LOG.info("Got node report from ASM for" 348 + ", nodeId=" + node.getNodeId() 349 + ", nodeAddress" + node.getHttpAddress() 350 + ", nodeRackName" + node.getRackName() 351 + ", nodeNumContainers" + node.getNumContainers() 352 + ", nodeHealthStatus" + node.getNodeHealthStatus()); 353 } 354 355 GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class); 356 GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq); 357 QueueInfo queueInfo = queueInfoResp.getQueueInfo(); 358 LOG.info("Queue info" 359 + ", queueName=" + queueInfo.getQueueName() 360 + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() 361 + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() 362 + ", queueApplicationCount=" + queueInfo.getApplications().size() 363 + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); 364 365 GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class); 366 GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq); 367 List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList(); 368 for (QueueUserACLInfo aclInfo : listAclInfo) { 369 for (QueueACL userAcl : aclInfo.getUserAcls()) { 370 LOG.info("User ACL Info for Queue" 371 + ", queueName=" + aclInfo.getQueueName() 372 + ", userAcl=" + userAcl.name()); 373 } 374 } 375 376 // Get a new application id 377 GetNewApplicationResponse newApp = getApplication(); 378 ApplicationId appId = newApp.getApplicationId(); 379 380 // TODO get min/max resource capabilities from RM and change memory ask if needed 381 // If we do not have min/max, we may not be able to correctly request 382 // the required resources from the RM for the app master 383 // Memory ask has to be a multiple of min and less than max. 384 // Dump out information about cluster capability as seen by the resource manager 385 int minMem = newApp.getMinimumResourceCapability().getMemory(); 386 int maxMem = newApp.getMaximumResourceCapability().getMemory(); 387 LOG.info("Min mem capabililty of resources in this cluster " + minMem); 388 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 389 390 // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 391 // a multiple of the min value and cannot exceed the max. 392 // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min 393 if (amMemory < minMem) { 394 LOG.info("AM memory specified below min threshold of cluster. Using min value." 395 + ", specified=" + amMemory 396 + ", min=" + minMem); 397 amMemory = minMem; 398 } 399 else if (amMemory > maxMem) { 400 LOG.info("AM memory specified above max threshold of cluster. Using max value." 401 + ", specified=" + amMemory 402 + ", max=" + maxMem); 403 amMemory = maxMem; 404 } 405 406 // Create launch context for app master 407 LOG.info("Setting up application submission context for ASM"); 408 ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); 409 410 // set the application id 411 appContext.setApplicationId(appId); 412 // set the application name 413 appContext.setApplicationName(appName); 414 415 // Set up the container launch context for the application master 416 ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); 417 418 // set local resources for the application master 419 // local files or archives as needed 420 // In this scenario, the jar file for the application master is part of the local resources 421 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 422 423 LOG.info("Copy App Master jar from local filesystem and add to local environment"); 424 // Copy the application master jar to the filesystem 425 // Create a local resource to point to the destination jar path 426 FileSystem fs = FileSystem.get(conf); 427 Path src = new Path(appMasterJar); 428 String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; 429 Path dst = new Path(fs.getHomeDirectory(), pathSuffix); 430 fs.copyFromLocalFile(false, true, src, dst); 431 FileStatus destStatus = fs.getFileStatus(dst); 432 LocalResource amJarRsrc = Records.newRecord(LocalResource.class); 433 434 // Set the type of resource - file or archive 435 // archives are untarred at destination 436 // we don't need the jar file to be untarred for now 437 amJarRsrc.setType(LocalResourceType.FILE); 438 // Set visibility of the resource 439 // Setting to most private option 440 amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 441 // Set the resource to be copied over 442 amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 443 // Set timestamp and length of file so that the framework 444 // can do basic sanity checks for the local resource 445 // after it has been copied over to ensure it is the same 446 // resource the client intended to use with the application 447 amJarRsrc.setTimestamp(destStatus.getModificationTime()); 448 amJarRsrc.setSize(destStatus.getLen()); 449 localResources.put("AppMaster.jar", amJarRsrc); 450 451 // Set the log4j properties if needed 452 if (!log4jPropFile.isEmpty()) { 453 Path log4jSrc = new Path(log4jPropFile); 454 Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); 455 fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); 456 FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); 457 LocalResource log4jRsrc = Records.newRecord(LocalResource.class); 458 log4jRsrc.setType(LocalResourceType.FILE); 459 log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 460 log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); 461 log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); 462 log4jRsrc.setSize(log4jFileStatus.getLen()); 463 localResources.put("log4j.properties", log4jRsrc); 464 } 465 466 // The shell script has to be made available on the final container(s) 467 // where it will be executed. 468 // To do this, we need to first copy into the filesystem that is visible 469 // to the yarn framework. 470 // We do not need to set this as a local resource for the application 471 // master as the application master does not need it. 472 String hdfsShellScriptLocation = ""; 473 long hdfsShellScriptLen = 0; 474 long hdfsShellScriptTimestamp = 0; 475 if (!shellScriptPath.isEmpty()) { 476 Path shellSrc = new Path(shellScriptPath); 477 String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; 478 Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); 479 fs.copyFromLocalFile(false, true, shellSrc, shellDst); 480 hdfsShellScriptLocation = shellDst.toUri().toString(); 481 FileStatus shellFileStatus = fs.getFileStatus(shellDst); 482 hdfsShellScriptLen = shellFileStatus.getLen(); 483 hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); 484 } 485 486 // Set local resource info into app master container launch context 487 amContainer.setLocalResources(localResources); 488 489 // Set the necessary security tokens as needed 490 //amContainer.setContainerTokens(containerToken); 491 492 // Set the env variables to be setup in the env where the application master will be run 493 LOG.info("Set the environment for the application master"); 494 Map<String, String> env = new HashMap<String, String>(); 495 496 // put location of shell script into env 497 // using the env info, the application master will create the correct local resource for the 498 // eventual containers that will be launched to execute the shell scripts 499 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); 500 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); 501 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); 502 503 // Add AppMaster.jar location to classpath 504 // At some point we should not be required to add 505 // the hadoop specific classpaths to the env. 506 // It should be provided out of the box. 507 // For now setting all required classpaths including 508 // the classpath to "." for the application jar 509 String classPathEnv = "${CLASSPATH}" 510 + ":./*" 511 + ":$HADOOP_CONF_DIR" 512 + ":$HADOOP_COMMON_HOME/share/hadoop/common/*" 513 + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*" 514 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*" 515 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*" 516 + ":$YARN_HOME/modules/*" 517 + ":$YARN_HOME/lib/*" 518 + ":./log4j.properties:"; 519 520 // add the runtime classpath needed for tests to work 521 String testRuntimeClassPath = Client.getTestRuntimeClasspath(); 522 classPathEnv += ":" + testRuntimeClassPath; 523 524 env.put("CLASSPATH", classPathEnv); 525 526 amContainer.setEnvironment(env); 527 528 // Set the necessary command to execute the application master 529 Vector<CharSequence> vargs = new Vector<CharSequence>(30); 530 531 // Set java executable command 532 LOG.info("Setting up app master command"); 533 vargs.add("${JAVA_HOME}" + "/bin/java"); 534 // Set Xmx based on am memory size 535 vargs.add("-Xmx" + amMemory + "m"); 536 // Set class name 537 vargs.add(appMasterMainClass); 538 // Set params for Application Master 539 vargs.add("--container_memory " + String.valueOf(containerMemory)); 540 vargs.add("--num_containers " + String.valueOf(numContainers)); 541 vargs.add("--priority " + String.valueOf(shellCmdPriority)); 542 if (!shellCommand.isEmpty()) { 543 vargs.add("--shell_command " + shellCommand + ""); 544 } 545 if (!shellArgs.isEmpty()) { 546 vargs.add("--shell_args " + shellArgs + ""); 547 } 548 for (Map.Entry<String, String> entry : shellEnv.entrySet()) { 549 vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); 550 } 551 if (debugFlag) { 552 vargs.add("--debug"); 553 } 554 555 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); 556 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); 557 558 // Get final commmand 559 StringBuilder command = new StringBuilder(); 560 for (CharSequence str : vargs) { 561 command.append(str).append(" "); 562 } 563 564 LOG.info("Completed setting up app master command " + command.toString()); 565 List<String> commands = new ArrayList<String>(); 566 commands.add(command.toString()); 567 amContainer.setCommands(commands); 568 569 // For launching an AM Container, setting user here is not needed 570 // Set user in ApplicationSubmissionContext 571 // amContainer.setUser(amUser); 572 573 // Set up resource type requirements 574 // For now, only memory is supported so we set memory requirements 575 Resource capability = Records.newRecord(Resource.class); 576 capability.setMemory(amMemory); 577 amContainer.setResource(capability); 578 579 // Service data is a binary blob that can be passed to the application 580 // Not needed in this scenario 581 // amContainer.setServiceData(serviceData); 582 583 // The following are not required for launching an application master 584 // amContainer.setContainerId(containerId); 585 586 appContext.setAMContainerSpec(amContainer); 587 588 // Set the priority for the application master 589 Priority pri = Records.newRecord(Priority.class); 590 // TODO - what is the range for priority? how to decide? 591 pri.setPriority(amPriority); 592 appContext.setPriority(pri); 593 594 // Set the queue to which this application is to be submitted in the RM 595 appContext.setQueue(amQueue); 596 // Set the user submitting this application 597 // TODO can it be empty? 598 appContext.setUser(amUser); 599 600 // Create the request to send to the applications manager 601 SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); 602 appRequest.setApplicationSubmissionContext(appContext); 603 604 // Submit the application to the applications manager 605 // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); 606 // Ignore the response as either a valid response object is returned on success 607 // or an exception thrown to denote some form of a failure 608 LOG.info("Submitting application to ASM"); 609 applicationsManager.submitApplication(appRequest); 610 611 // TODO 612 // Try submitting the same request again 613 // app submission failure? 614 615 // Monitor the application 616 return monitorApplication(appId); 617 618 } 619 620 /** 621 * Monitor the submitted application for completion. 622 * Kill application if time expires. 623 * @param appId Application Id of application to be monitored 624 * @return true if application completed successfully 625 * @throws YarnRemoteException 626 */ 627 private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException { 628 629 while (true) { 630 631 // Check app status every 1 second. 632 try { 633 Thread.sleep(1000); 634 } catch (InterruptedException e) { 635 LOG.debug("Thread sleep in monitoring loop interrupted"); 636 } 637 638 // Get application report for the appId we are interested in 639 GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); 640 reportRequest.setApplicationId(appId); 641 GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest); 642 ApplicationReport report = reportResponse.getApplicationReport(); 643 644 LOG.info("Got application report from ASM for" 645 + ", appId=" + appId.getId() 646 + ", clientToken=" + report.getClientToken() 647 + ", appDiagnostics=" + report.getDiagnostics() 648 + ", appMasterHost=" + report.getHost() 649 + ", appQueue=" + report.getQueue() 650 + ", appMasterRpcPort=" + report.getRpcPort() 651 + ", appStartTime=" + report.getStartTime() 652 + ", yarnAppState=" + report.getYarnApplicationState().toString() 653 + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() 654 + ", appTrackingUrl=" + report.getTrackingUrl() 655 + ", appUser=" + report.getUser()); 656 657 YarnApplicationState state = report.getYarnApplicationState(); 658 FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); 659 if (YarnApplicationState.FINISHED == state) { 660 if (FinalApplicationStatus.SUCCEEDED == dsStatus) { 661 LOG.info("Application has completed successfully. Breaking monitoring loop"); 662 return true; 663 } 664 else { 665 LOG.info("Application did finished unsuccessfully." 666 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 667 + ". Breaking monitoring loop"); 668 return false; 669 } 670 } 671 else if (YarnApplicationState.KILLED == state 672 || YarnApplicationState.FAILED == state) { 673 LOG.info("Application did not finish." 674 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 675 + ". Breaking monitoring loop"); 676 return false; 677 } 678 679 if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { 680 LOG.info("Reached client specified timeout for application. Killing application"); 681 killApplication(appId); 682 return false; 683 } 684 } 685 686 } 687 688 /** 689 * Kill a submitted application by sending a call to the ASM 690 * @param appId Application Id to be killed. 691 * @throws YarnRemoteException 692 */ 693 private void killApplication(ApplicationId appId) throws YarnRemoteException { 694 KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); 695 // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 696 // the same time. 697 // If yes, can we kill a particular attempt only? 698 request.setApplicationId(appId); 699 // KillApplicationResponse response = applicationsManager.forceKillApplication(request); 700 // Response can be ignored as it is non-null on success or 701 // throws an exception in case of failures 702 applicationsManager.forceKillApplication(request); 703 } 704 705 /** 706 * Connect to the Resource Manager/Applications Manager 707 * @return Handle to communicate with the ASM 708 * @throws IOException 709 */ 710 private void connectToASM() throws IOException { 711 712 /* 713 UserGroupInformation user = UserGroupInformation.getCurrentUser(); 714 applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() { 715 public ClientRMProtocol run() { 716 InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get( 717 YarnConfiguration.RM_SCHEDULER_ADDRESS, 718 YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); 719 LOG.info("Connecting to ResourceManager at " + rmAddress); 720 Configuration appsManagerServerConf = new Configuration(conf); 721 appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, 722 ClientRMSecurityInfo.class, SecurityInfo.class); 723 ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf)); 724 return asm; 725 } 726 }); 727 */ 728 YarnConfiguration yarnConf = new YarnConfiguration(conf); 729 InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get( 730 YarnConfiguration.RM_ADDRESS, 731 YarnConfiguration.DEFAULT_RM_ADDRESS)); 732 LOG.info("Connecting to ResourceManager at " + rmAddress); 733 applicationsManager = ((ClientRMProtocol) rpc.getProxy( 734 ClientRMProtocol.class, rmAddress, conf)); 735 } 736 737 /** 738 * Get a new application from the ASM 739 * @return New Application 740 * @throws YarnRemoteException 741 */ 742 private GetNewApplicationResponse getApplication() throws YarnRemoteException { 743 GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); 744 GetNewApplicationResponse response = applicationsManager.getNewApplication(request); 745 LOG.info("Got new application id=" + response.getApplicationId()); 746 return response; 747 } 748 749 private static String getTestRuntimeClasspath() { 750 751 InputStream classpathFileStream = null; 752 BufferedReader reader = null; 753 String envClassPath = ""; 754 755 LOG.info("Trying to generate classpath for app master from current thread's classpath"); 756 try { 757 758 // Create classpath from generated classpath 759 // Check maven ppom.xml for generated classpath info 760 // Works if compile time env is same as runtime. Mainly tests. 761 ClassLoader thisClassLoader = 762 Thread.currentThread().getContextClassLoader(); 763 String generatedClasspathFile = "yarn-apps-ds-generated-classpath"; 764 classpathFileStream = 765 thisClassLoader.getResourceAsStream(generatedClasspathFile); 766 if (classpathFileStream == null) { 767 LOG.info("Could not classpath resource from class loader"); 768 return envClassPath; 769 } 770 LOG.info("Readable bytes from stream=" + classpathFileStream.available()); 771 reader = new BufferedReader(new InputStreamReader(classpathFileStream)); 772 String cp = reader.readLine(); 773 if (cp != null) { 774 envClassPath += cp.trim() + ":"; 775 } 776 // Put the file itself on classpath for tasks. 777 envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile(); 778 } catch (IOException e) { 779 LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage()); 780 } 781 782 try { 783 if (classpathFileStream != null) { 784 classpathFileStream.close(); 785 } 786 if (reader != null) { 787 reader.close(); 788 } 789 } catch (IOException e) { 790 LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage()); 791 } 792 return envClassPath; 793 } 794 795 }