001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.yarn.applications.distributedshell; 020 021 import java.io.BufferedReader; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.InputStreamReader; 025 import java.net.InetSocketAddress; 026 import java.util.ArrayList; 027 import java.util.HashMap; 028 import java.util.List; 029 import java.util.Map; 030 import java.util.Vector; 031 032 import org.apache.commons.cli.CommandLine; 033 import org.apache.commons.cli.GnuParser; 034 import org.apache.commons.cli.HelpFormatter; 035 import org.apache.commons.cli.Options; 036 import org.apache.commons.cli.ParseException; 037 import org.apache.commons.logging.Log; 038 import org.apache.commons.logging.LogFactory; 039 import org.apache.hadoop.classification.InterfaceAudience; 040 import org.apache.hadoop.classification.InterfaceStability; 041 import org.apache.hadoop.conf.Configuration; 042 import org.apache.hadoop.fs.FileStatus; 043 import org.apache.hadoop.fs.FileSystem; 044 import org.apache.hadoop.fs.Path; 045 import org.apache.hadoop.yarn.api.ApplicationConstants; 046 import org.apache.hadoop.yarn.api.ClientRMProtocol; 047 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; 048 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; 049 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; 050 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; 051 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; 052 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; 053 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; 054 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; 055 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; 056 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; 057 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; 058 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; 059 import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; 060 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; 061 import org.apache.hadoop.yarn.api.records.ApplicationId; 062 import org.apache.hadoop.yarn.api.records.ApplicationReport; 063 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; 064 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; 065 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; 066 import org.apache.hadoop.yarn.api.records.LocalResource; 067 import org.apache.hadoop.yarn.api.records.LocalResourceType; 068 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; 069 import org.apache.hadoop.yarn.api.records.NodeReport; 070 import org.apache.hadoop.yarn.api.records.Priority; 071 import org.apache.hadoop.yarn.api.records.QueueACL; 072 import org.apache.hadoop.yarn.api.records.QueueInfo; 073 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; 074 import org.apache.hadoop.yarn.api.records.Resource; 075 import org.apache.hadoop.yarn.api.records.YarnApplicationState; 076 import org.apache.hadoop.yarn.conf.YarnConfiguration; 077 import org.apache.hadoop.yarn.exceptions.YarnRemoteException; 078 import org.apache.hadoop.yarn.ipc.YarnRPC; 079 import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; 080 import org.apache.hadoop.yarn.util.ConverterUtils; 081 import org.apache.hadoop.yarn.util.Records; 082 083 084 /** 085 * Client for Distributed Shell application submission to YARN. 086 * 087 * <p> The distributed shell client allows an application master to be launched that in turn would run 088 * the provided shell command on a set of containers. </p> 089 * 090 * <p>This client is meant to act as an example on how to write yarn-based applications. </p> 091 * 092 * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 093 * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 094 * provides a way for the client to get access to cluster information and to request for a 095 * new {@link ApplicationId}. <p> 096 * 097 * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 098 * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 099 * and application name, user submitting the application, the priority assigned to the application and the queue 100 * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext} 101 * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 102 * the {@link ApplicationMaster} is launched. </p> 103 * 104 * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 105 * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 106 * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 107 * {@link ApplicationMaster}. <p> 108 * 109 * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 110 * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 111 * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 112 * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p> 113 * 114 */ 115 @InterfaceAudience.Public 116 @InterfaceStability.Unstable 117 public class Client { 118 119 private static final Log LOG = LogFactory.getLog(Client.class); 120 121 // Configuration 122 private Configuration conf; 123 124 // RPC to communicate to RM 125 private YarnRPC rpc; 126 127 // Handle to talk to the Resource Manager/Applications Manager 128 private ClientRMProtocol applicationsManager; 129 130 // Application master specific info to register a new Application with RM/ASM 131 private String appName = ""; 132 // App master priority 133 private int amPriority = 0; 134 // Queue for App master 135 private String amQueue = ""; 136 // User to run app master as 137 private String amUser = ""; 138 // Amt. of memory resource to request for to run the App Master 139 private int amMemory = 10; 140 141 // Application master jar file 142 private String appMasterJar = ""; 143 // Main class to invoke application master 144 private String appMasterMainClass = ""; 145 146 // Shell command to be executed 147 private String shellCommand = ""; 148 // Location of shell script 149 private String shellScriptPath = ""; 150 // Args to be passed to the shell command 151 private String shellArgs = ""; 152 // Env variables to be setup for the shell command 153 private Map<String, String> shellEnv = new HashMap<String, String>(); 154 // Shell Command Container priority 155 private int shellCmdPriority = 0; 156 157 // Amt of memory to request for container in which shell script will be executed 158 private int containerMemory = 10; 159 // No. of containers in which the shell script needs to be executed 160 private int numContainers = 1; 161 162 // log4j.properties file 163 // if available, add to local resources and set into classpath 164 private String log4jPropFile = ""; 165 166 // Start time for client 167 private final long clientStartTime = System.currentTimeMillis(); 168 // Timeout threshold for client. Kill app after time interval expires. 169 private long clientTimeout = 600000; 170 171 // Debug flag 172 boolean debugFlag = false; 173 174 /** 175 * @param args Command line arguments 176 */ 177 public static void main(String[] args) { 178 boolean result = false; 179 try { 180 Client client = new Client(); 181 LOG.info("Initializing Client"); 182 boolean doRun = client.init(args); 183 if (!doRun) { 184 System.exit(0); 185 } 186 result = client.run(); 187 } catch (Throwable t) { 188 LOG.fatal("Error running CLient", t); 189 System.exit(1); 190 } 191 if (result) { 192 LOG.info("Application completed successfully"); 193 System.exit(0); 194 } 195 LOG.error("Application failed to complete successfully"); 196 System.exit(2); 197 } 198 199 /** 200 */ 201 public Client() throws Exception { 202 // Set up the configuration and RPC 203 conf = new Configuration(); 204 rpc = YarnRPC.create(conf); 205 } 206 207 /** 208 * Helper function to print out usage 209 * @param opts Parsed command line options 210 */ 211 private void printUsage(Options opts) { 212 new HelpFormatter().printHelp("Client", opts); 213 } 214 215 /** 216 * Parse command line options 217 * @param args Parsed command line options 218 * @return Whether the init was successful to run the client 219 */ 220 public boolean init(String[] args) throws ParseException { 221 222 Options opts = new Options(); 223 opts.addOption("appname", true, "Application Name. Default value - DistributedShell"); 224 opts.addOption("priority", true, "Application Priority. Default 0"); 225 opts.addOption("queue", true, "RM Queue in which this application is to be submitted"); 226 opts.addOption("user", true, "User to run the application as"); 227 opts.addOption("timeout", true, "Application timeout in milliseconds"); 228 opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master"); 229 opts.addOption("jar", true, "Jar file containing the application master"); 230 opts.addOption("class", true, "Main class to be run for the Application Master."); 231 opts.addOption("shell_command", true, "Shell command to be executed by the Application Master"); 232 opts.addOption("shell_script", true, "Location of the shell script to be executed"); 233 opts.addOption("shell_args", true, "Command line args for the shell script"); 234 opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs"); 235 opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers"); 236 opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command"); 237 opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed"); 238 opts.addOption("log_properties", true, "log4j.properties file"); 239 opts.addOption("debug", false, "Dump out debug information"); 240 opts.addOption("help", false, "Print usage"); 241 CommandLine cliParser = new GnuParser().parse(opts, args); 242 243 if (args.length == 0) { 244 printUsage(opts); 245 throw new IllegalArgumentException("No args specified for client to initialize"); 246 } 247 248 if (cliParser.hasOption("help")) { 249 printUsage(opts); 250 return false; 251 } 252 253 if (cliParser.hasOption("debug")) { 254 debugFlag = true; 255 256 } 257 258 appName = cliParser.getOptionValue("appname", "DistributedShell"); 259 amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0")); 260 amQueue = cliParser.getOptionValue("queue", ""); 261 amUser = cliParser.getOptionValue("user", ""); 262 amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10")); 263 264 if (amMemory < 0) { 265 throw new IllegalArgumentException("Invalid memory specified for application master, exiting." 266 + " Specified memory=" + amMemory); 267 } 268 269 if (!cliParser.hasOption("jar")) { 270 throw new IllegalArgumentException("No jar file specified for application master"); 271 } 272 273 appMasterJar = cliParser.getOptionValue("jar"); 274 appMasterMainClass = cliParser.getOptionValue("class", 275 "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster"); 276 277 if (!cliParser.hasOption("shell_command")) { 278 throw new IllegalArgumentException("No shell command specified to be executed by application master"); 279 } 280 shellCommand = cliParser.getOptionValue("shell_command"); 281 282 if (cliParser.hasOption("shell_script")) { 283 shellScriptPath = cliParser.getOptionValue("shell_script"); 284 } 285 if (cliParser.hasOption("shell_args")) { 286 shellArgs = cliParser.getOptionValue("shell_args"); 287 } 288 if (cliParser.hasOption("shell_env")) { 289 String envs[] = cliParser.getOptionValues("shell_env"); 290 for (String env : envs) { 291 env = env.trim(); 292 int index = env.indexOf('='); 293 if (index == -1) { 294 shellEnv.put(env, ""); 295 continue; 296 } 297 String key = env.substring(0, index); 298 String val = ""; 299 if (index < (env.length()-1)) { 300 val = env.substring(index+1); 301 } 302 shellEnv.put(key, val); 303 } 304 } 305 shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0")); 306 307 containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10")); 308 numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1")); 309 310 if (containerMemory < 0 || numContainers < 1) { 311 throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting." 312 + " Specified containerMemory=" + containerMemory 313 + ", numContainer=" + numContainers); 314 } 315 316 clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); 317 318 log4jPropFile = cliParser.getOptionValue("log_properties", ""); 319 320 return true; 321 } 322 323 /** 324 * Main run function for the client 325 * @return true if application completed successfully 326 * @throws IOException 327 */ 328 public boolean run() throws IOException { 329 LOG.info("Starting Client"); 330 331 // Connect to ResourceManager 332 connectToASM(); 333 assert(applicationsManager != null); 334 335 // Use ClientRMProtocol handle to general cluster information 336 GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class); 337 GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq); 338 LOG.info("Got Cluster metric info from ASM" 339 + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers()); 340 341 GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class); 342 GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq); 343 LOG.info("Got Cluster node info from ASM"); 344 for (NodeReport node : clusterNodesResp.getNodeReports()) { 345 LOG.info("Got node report from ASM for" 346 + ", nodeId=" + node.getNodeId() 347 + ", nodeAddress" + node.getHttpAddress() 348 + ", nodeRackName" + node.getRackName() 349 + ", nodeNumContainers" + node.getNumContainers() 350 + ", nodeHealthStatus" + node.getNodeHealthStatus()); 351 } 352 353 GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class); 354 GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq); 355 QueueInfo queueInfo = queueInfoResp.getQueueInfo(); 356 LOG.info("Queue info" 357 + ", queueName=" + queueInfo.getQueueName() 358 + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity() 359 + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity() 360 + ", queueApplicationCount=" + queueInfo.getApplications().size() 361 + ", queueChildQueueCount=" + queueInfo.getChildQueues().size()); 362 363 GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class); 364 GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq); 365 List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList(); 366 for (QueueUserACLInfo aclInfo : listAclInfo) { 367 for (QueueACL userAcl : aclInfo.getUserAcls()) { 368 LOG.info("User ACL Info for Queue" 369 + ", queueName=" + aclInfo.getQueueName() 370 + ", userAcl=" + userAcl.name()); 371 } 372 } 373 374 // Get a new application id 375 GetNewApplicationResponse newApp = getApplication(); 376 ApplicationId appId = newApp.getApplicationId(); 377 378 // TODO get min/max resource capabilities from RM and change memory ask if needed 379 // If we do not have min/max, we may not be able to correctly request 380 // the required resources from the RM for the app master 381 // Memory ask has to be a multiple of min and less than max. 382 // Dump out information about cluster capability as seen by the resource manager 383 int minMem = newApp.getMinimumResourceCapability().getMemory(); 384 int maxMem = newApp.getMaximumResourceCapability().getMemory(); 385 LOG.info("Min mem capabililty of resources in this cluster " + minMem); 386 LOG.info("Max mem capabililty of resources in this cluster " + maxMem); 387 388 // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 389 // a multiple of the min value and cannot exceed the max. 390 // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min 391 if (amMemory < minMem) { 392 LOG.info("AM memory specified below min threshold of cluster. Using min value." 393 + ", specified=" + amMemory 394 + ", min=" + minMem); 395 amMemory = minMem; 396 } 397 else if (amMemory > maxMem) { 398 LOG.info("AM memory specified above max threshold of cluster. Using max value." 399 + ", specified=" + amMemory 400 + ", max=" + maxMem); 401 amMemory = maxMem; 402 } 403 404 // Create launch context for app master 405 LOG.info("Setting up application submission context for ASM"); 406 ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class); 407 408 // set the application id 409 appContext.setApplicationId(appId); 410 // set the application name 411 appContext.setApplicationName(appName); 412 413 // Set up the container launch context for the application master 414 ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class); 415 416 // set local resources for the application master 417 // local files or archives as needed 418 // In this scenario, the jar file for the application master is part of the local resources 419 Map<String, LocalResource> localResources = new HashMap<String, LocalResource>(); 420 421 LOG.info("Copy App Master jar from local filesystem and add to local environment"); 422 // Copy the application master jar to the filesystem 423 // Create a local resource to point to the destination jar path 424 FileSystem fs = FileSystem.get(conf); 425 Path src = new Path(appMasterJar); 426 String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar"; 427 Path dst = new Path(fs.getHomeDirectory(), pathSuffix); 428 fs.copyFromLocalFile(false, true, src, dst); 429 FileStatus destStatus = fs.getFileStatus(dst); 430 LocalResource amJarRsrc = Records.newRecord(LocalResource.class); 431 432 // Set the type of resource - file or archive 433 // archives are untarred at destination 434 // we don't need the jar file to be untarred for now 435 amJarRsrc.setType(LocalResourceType.FILE); 436 // Set visibility of the resource 437 // Setting to most private option 438 amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 439 // Set the resource to be copied over 440 amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 441 // Set timestamp and length of file so that the framework 442 // can do basic sanity checks for the local resource 443 // after it has been copied over to ensure it is the same 444 // resource the client intended to use with the application 445 amJarRsrc.setTimestamp(destStatus.getModificationTime()); 446 amJarRsrc.setSize(destStatus.getLen()); 447 localResources.put("AppMaster.jar", amJarRsrc); 448 449 // Set the log4j properties if needed 450 if (!log4jPropFile.isEmpty()) { 451 Path log4jSrc = new Path(log4jPropFile); 452 Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props"); 453 fs.copyFromLocalFile(false, true, log4jSrc, log4jDst); 454 FileStatus log4jFileStatus = fs.getFileStatus(log4jDst); 455 LocalResource log4jRsrc = Records.newRecord(LocalResource.class); 456 log4jRsrc.setType(LocalResourceType.FILE); 457 log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION); 458 log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri())); 459 log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime()); 460 log4jRsrc.setSize(log4jFileStatus.getLen()); 461 localResources.put("log4j.properties", log4jRsrc); 462 } 463 464 // The shell script has to be made available on the final container(s) 465 // where it will be executed. 466 // To do this, we need to first copy into the filesystem that is visible 467 // to the yarn framework. 468 // We do not need to set this as a local resource for the application 469 // master as the application master does not need it. 470 String hdfsShellScriptLocation = ""; 471 long hdfsShellScriptLen = 0; 472 long hdfsShellScriptTimestamp = 0; 473 if (!shellScriptPath.isEmpty()) { 474 Path shellSrc = new Path(shellScriptPath); 475 String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh"; 476 Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix); 477 fs.copyFromLocalFile(false, true, shellSrc, shellDst); 478 hdfsShellScriptLocation = shellDst.toUri().toString(); 479 FileStatus shellFileStatus = fs.getFileStatus(shellDst); 480 hdfsShellScriptLen = shellFileStatus.getLen(); 481 hdfsShellScriptTimestamp = shellFileStatus.getModificationTime(); 482 } 483 484 // Set local resource info into app master container launch context 485 amContainer.setLocalResources(localResources); 486 487 // Set the necessary security tokens as needed 488 //amContainer.setContainerTokens(containerToken); 489 490 // Set the env variables to be setup in the env where the application master will be run 491 LOG.info("Set the environment for the application master"); 492 Map<String, String> env = new HashMap<String, String>(); 493 494 // put location of shell script into env 495 // using the env info, the application master will create the correct local resource for the 496 // eventual containers that will be launched to execute the shell scripts 497 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation); 498 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp)); 499 env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen)); 500 501 // Add AppMaster.jar location to classpath 502 // At some point we should not be required to add 503 // the hadoop specific classpaths to the env. 504 // It should be provided out of the box. 505 // For now setting all required classpaths including 506 // the classpath to "." for the application jar 507 String classPathEnv = "${CLASSPATH}" 508 + ":./*" 509 + ":$HADOOP_CONF_DIR" 510 + ":$HADOOP_COMMON_HOME/share/hadoop/common/*" 511 + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*" 512 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*" 513 + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*" 514 + ":$YARN_HOME/modules/*" 515 + ":$YARN_HOME/lib/*" 516 + ":./log4j.properties:"; 517 518 // add the runtime classpath needed for tests to work 519 String testRuntimeClassPath = Client.getTestRuntimeClasspath(); 520 classPathEnv += ":" + testRuntimeClassPath; 521 522 env.put("CLASSPATH", classPathEnv); 523 524 amContainer.setEnvironment(env); 525 526 // Set the necessary command to execute the application master 527 Vector<CharSequence> vargs = new Vector<CharSequence>(30); 528 529 // Set java executable command 530 LOG.info("Setting up app master command"); 531 vargs.add("${JAVA_HOME}" + "/bin/java"); 532 // Set Xmx based on am memory size 533 vargs.add("-Xmx" + amMemory + "m"); 534 // Set class name 535 vargs.add(appMasterMainClass); 536 // Set params for Application Master 537 vargs.add("--container_memory " + String.valueOf(containerMemory)); 538 vargs.add("--num_containers " + String.valueOf(numContainers)); 539 vargs.add("--priority " + String.valueOf(shellCmdPriority)); 540 if (!shellCommand.isEmpty()) { 541 vargs.add("--shell_command " + shellCommand + ""); 542 } 543 if (!shellArgs.isEmpty()) { 544 vargs.add("--shell_args " + shellArgs + ""); 545 } 546 for (Map.Entry<String, String> entry : shellEnv.entrySet()) { 547 vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue()); 548 } 549 if (debugFlag) { 550 vargs.add("--debug"); 551 } 552 553 vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout"); 554 vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr"); 555 556 // Get final commmand 557 StringBuilder command = new StringBuilder(); 558 for (CharSequence str : vargs) { 559 command.append(str).append(" "); 560 } 561 562 LOG.info("Completed setting up app master command " + command.toString()); 563 List<String> commands = new ArrayList<String>(); 564 commands.add(command.toString()); 565 amContainer.setCommands(commands); 566 567 // For launching an AM Container, setting user here is not needed 568 // Set user in ApplicationSubmissionContext 569 // amContainer.setUser(amUser); 570 571 // Set up resource type requirements 572 // For now, only memory is supported so we set memory requirements 573 Resource capability = Records.newRecord(Resource.class); 574 capability.setMemory(amMemory); 575 amContainer.setResource(capability); 576 577 // Service data is a binary blob that can be passed to the application 578 // Not needed in this scenario 579 // amContainer.setServiceData(serviceData); 580 581 // The following are not required for launching an application master 582 // amContainer.setContainerId(containerId); 583 584 appContext.setAMContainerSpec(amContainer); 585 586 // Set the priority for the application master 587 Priority pri = Records.newRecord(Priority.class); 588 // TODO - what is the range for priority? how to decide? 589 pri.setPriority(amPriority); 590 appContext.setPriority(pri); 591 592 // Set the queue to which this application is to be submitted in the RM 593 appContext.setQueue(amQueue); 594 // Set the user submitting this application 595 // TODO can it be empty? 596 appContext.setUser(amUser); 597 598 // Create the request to send to the applications manager 599 SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class); 600 appRequest.setApplicationSubmissionContext(appContext); 601 602 // Submit the application to the applications manager 603 // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest); 604 // Ignore the response as either a valid response object is returned on success 605 // or an exception thrown to denote some form of a failure 606 LOG.info("Submitting application to ASM"); 607 applicationsManager.submitApplication(appRequest); 608 609 // TODO 610 // Try submitting the same request again 611 // app submission failure? 612 613 // Monitor the application 614 return monitorApplication(appId); 615 616 } 617 618 /** 619 * Monitor the submitted application for completion. 620 * Kill application if time expires. 621 * @param appId Application Id of application to be monitored 622 * @return true if application completed successfully 623 * @throws YarnRemoteException 624 */ 625 private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException { 626 627 while (true) { 628 629 // Check app status every 1 second. 630 try { 631 Thread.sleep(1000); 632 } catch (InterruptedException e) { 633 LOG.debug("Thread sleep in monitoring loop interrupted"); 634 } 635 636 // Get application report for the appId we are interested in 637 GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class); 638 reportRequest.setApplicationId(appId); 639 GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest); 640 ApplicationReport report = reportResponse.getApplicationReport(); 641 642 LOG.info("Got application report from ASM for" 643 + ", appId=" + appId.getId() 644 + ", clientToken=" + report.getClientToken() 645 + ", appDiagnostics=" + report.getDiagnostics() 646 + ", appMasterHost=" + report.getHost() 647 + ", appQueue=" + report.getQueue() 648 + ", appMasterRpcPort=" + report.getRpcPort() 649 + ", appStartTime=" + report.getStartTime() 650 + ", yarnAppState=" + report.getYarnApplicationState().toString() 651 + ", distributedFinalState=" + report.getFinalApplicationStatus().toString() 652 + ", appTrackingUrl=" + report.getTrackingUrl() 653 + ", appUser=" + report.getUser()); 654 655 YarnApplicationState state = report.getYarnApplicationState(); 656 FinalApplicationStatus dsStatus = report.getFinalApplicationStatus(); 657 if (YarnApplicationState.FINISHED == state) { 658 if (FinalApplicationStatus.SUCCEEDED == dsStatus) { 659 LOG.info("Application has completed successfully. Breaking monitoring loop"); 660 return true; 661 } 662 else { 663 LOG.info("Application did finished unsuccessfully." 664 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 665 + ". Breaking monitoring loop"); 666 return false; 667 } 668 } 669 else if (YarnApplicationState.KILLED == state 670 || YarnApplicationState.FAILED == state) { 671 LOG.info("Application did not finish." 672 + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString() 673 + ". Breaking monitoring loop"); 674 return false; 675 } 676 677 if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) { 678 LOG.info("Reached client specified timeout for application. Killing application"); 679 killApplication(appId); 680 return false; 681 } 682 } 683 684 } 685 686 /** 687 * Kill a submitted application by sending a call to the ASM 688 * @param appId Application Id to be killed. 689 * @throws YarnRemoteException 690 */ 691 private void killApplication(ApplicationId appId) throws YarnRemoteException { 692 KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); 693 // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 694 // the same time. 695 // If yes, can we kill a particular attempt only? 696 request.setApplicationId(appId); 697 // KillApplicationResponse response = applicationsManager.forceKillApplication(request); 698 // Response can be ignored as it is non-null on success or 699 // throws an exception in case of failures 700 applicationsManager.forceKillApplication(request); 701 } 702 703 /** 704 * Connect to the Resource Manager/Applications Manager 705 * @return Handle to communicate with the ASM 706 * @throws IOException 707 */ 708 private void connectToASM() throws IOException { 709 710 /* 711 UserGroupInformation user = UserGroupInformation.getCurrentUser(); 712 applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() { 713 public ClientRMProtocol run() { 714 InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get( 715 YarnConfiguration.RM_SCHEDULER_ADDRESS, 716 YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS)); 717 LOG.info("Connecting to ResourceManager at " + rmAddress); 718 Configuration appsManagerServerConf = new Configuration(conf); 719 appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO, 720 ClientRMSecurityInfo.class, SecurityInfo.class); 721 ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf)); 722 return asm; 723 } 724 }); 725 */ 726 YarnConfiguration yarnConf = new YarnConfiguration(conf); 727 InetSocketAddress rmAddress = yarnConf.getSocketAddr( 728 YarnConfiguration.RM_ADDRESS, 729 YarnConfiguration.DEFAULT_RM_ADDRESS, 730 YarnConfiguration.DEFAULT_RM_PORT); 731 LOG.info("Connecting to ResourceManager at " + rmAddress); 732 applicationsManager = ((ClientRMProtocol) rpc.getProxy( 733 ClientRMProtocol.class, rmAddress, conf)); 734 } 735 736 /** 737 * Get a new application from the ASM 738 * @return New Application 739 * @throws YarnRemoteException 740 */ 741 private GetNewApplicationResponse getApplication() throws YarnRemoteException { 742 GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class); 743 GetNewApplicationResponse response = applicationsManager.getNewApplication(request); 744 LOG.info("Got new application id=" + response.getApplicationId()); 745 return response; 746 } 747 748 private static String getTestRuntimeClasspath() { 749 750 InputStream classpathFileStream = null; 751 BufferedReader reader = null; 752 String envClassPath = ""; 753 754 LOG.info("Trying to generate classpath for app master from current thread's classpath"); 755 try { 756 757 // Create classpath from generated classpath 758 // Check maven ppom.xml for generated classpath info 759 // Works if compile time env is same as runtime. Mainly tests. 760 ClassLoader thisClassLoader = 761 Thread.currentThread().getContextClassLoader(); 762 String generatedClasspathFile = "yarn-apps-ds-generated-classpath"; 763 classpathFileStream = 764 thisClassLoader.getResourceAsStream(generatedClasspathFile); 765 if (classpathFileStream == null) { 766 LOG.info("Could not classpath resource from class loader"); 767 return envClassPath; 768 } 769 LOG.info("Readable bytes from stream=" + classpathFileStream.available()); 770 reader = new BufferedReader(new InputStreamReader(classpathFileStream)); 771 String cp = reader.readLine(); 772 if (cp != null) { 773 envClassPath += cp.trim() + ":"; 774 } 775 // Put the file itself on classpath for tasks. 776 envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile(); 777 } catch (IOException e) { 778 LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage()); 779 } 780 781 try { 782 if (classpathFileStream != null) { 783 classpathFileStream.close(); 784 } 785 if (reader != null) { 786 reader.close(); 787 } 788 } catch (IOException e) { 789 LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage()); 790 } 791 return envClassPath; 792 } 793 794 }