001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.yarn.applications.distributedshell;
020    
021    import java.io.BufferedReader;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.InputStreamReader;
025    import java.net.InetSocketAddress;
026    import java.util.ArrayList;
027    import java.util.HashMap;
028    import java.util.List;
029    import java.util.Map;
030    import java.util.Vector;
031    
032    import org.apache.commons.cli.CommandLine;
033    import org.apache.commons.cli.GnuParser;
034    import org.apache.commons.cli.HelpFormatter;
035    import org.apache.commons.cli.Options;
036    import org.apache.commons.cli.ParseException;
037    import org.apache.commons.logging.Log;
038    import org.apache.commons.logging.LogFactory;
039    import org.apache.hadoop.classification.InterfaceAudience;
040    import org.apache.hadoop.classification.InterfaceStability;
041    import org.apache.hadoop.conf.Configuration;
042    import org.apache.hadoop.fs.FileStatus;
043    import org.apache.hadoop.fs.FileSystem;
044    import org.apache.hadoop.fs.Path;
045    import org.apache.hadoop.yarn.api.ApplicationConstants;
046    import org.apache.hadoop.yarn.api.ClientRMProtocol;
047    import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
048    import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
049    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
050    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
051    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
052    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
053    import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
054    import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
055    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
056    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
057    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
058    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
059    import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
060    import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
061    import org.apache.hadoop.yarn.api.records.ApplicationId;
062    import org.apache.hadoop.yarn.api.records.ApplicationReport;
063    import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
064    import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
065    import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
066    import org.apache.hadoop.yarn.api.records.LocalResource;
067    import org.apache.hadoop.yarn.api.records.LocalResourceType;
068    import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
069    import org.apache.hadoop.yarn.api.records.NodeReport;
070    import org.apache.hadoop.yarn.api.records.Priority;
071    import org.apache.hadoop.yarn.api.records.QueueACL;
072    import org.apache.hadoop.yarn.api.records.QueueInfo;
073    import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
074    import org.apache.hadoop.yarn.api.records.Resource;
075    import org.apache.hadoop.yarn.api.records.YarnApplicationState;
076    import org.apache.hadoop.yarn.conf.YarnConfiguration;
077    import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
078    import org.apache.hadoop.yarn.ipc.YarnRPC;
079    import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo;
080    import org.apache.hadoop.yarn.util.ConverterUtils;
081    import org.apache.hadoop.yarn.util.Records;
082    
083    
084    /**
085     * Client for Distributed Shell application submission to YARN.
086     * 
087     * <p> The distributed shell client allows an application master to be launched that in turn would run 
088     * the provided shell command on a set of containers. </p>
089     * 
090     * <p>This client is meant to act as an example on how to write yarn-based applications. </p>
091     * 
092     * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 
093     * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 
094     * provides a way for the client to get access to cluster information and to request for a
095     * new {@link ApplicationId}. <p>
096     * 
097     * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 
098     * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 
099     * and application name, user submitting the application, the priority assigned to the application and the queue 
100     * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext}
101     * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 
102     * the {@link ApplicationMaster} is launched. </p>
103     * 
104     * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 
105     * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 
106     * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 
107     * {@link ApplicationMaster}. <p>
108     * 
109     * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 
110     * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 
111     * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 
112     * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p>
113     *
114     */
115    @InterfaceAudience.Public
116    @InterfaceStability.Unstable
117    public class Client {
118    
119      private static final Log LOG = LogFactory.getLog(Client.class);
120    
121      // Configuration
122      private Configuration conf;
123    
124      // RPC to communicate to RM
125      private YarnRPC rpc;
126    
127      // Handle to talk to the Resource Manager/Applications Manager
128      private ClientRMProtocol applicationsManager;
129    
130      // Application master specific info to register a new Application with RM/ASM
131      private String appName = "";
132      // App master priority
133      private int amPriority = 0;
134      // Queue for App master
135      private String amQueue = "";
136      // User to run app master as
137      private String amUser = "";
138      // Amt. of memory resource to request for to run the App Master
139      private int amMemory = 10; 
140    
141      // Application master jar file
142      private String appMasterJar = ""; 
143      // Main class to invoke application master
144      private String appMasterMainClass = "";
145    
146      // Shell command to be executed 
147      private String shellCommand = ""; 
148      // Location of shell script 
149      private String shellScriptPath = ""; 
150      // Args to be passed to the shell command
151      private String shellArgs = "";
152      // Env variables to be setup for the shell command 
153      private Map<String, String> shellEnv = new HashMap<String, String>();
154      // Shell Command Container priority 
155      private int shellCmdPriority = 0;
156    
157      // Amt of memory to request for container in which shell script will be executed
158      private int containerMemory = 10; 
159      // No. of containers in which the shell script needs to be executed
160      private int numContainers = 1;
161    
162      // log4j.properties file 
163      // if available, add to local resources and set into classpath 
164      private String log4jPropFile = "";    
165    
166      // Start time for client
167      private final long clientStartTime = System.currentTimeMillis();
168      // Timeout threshold for client. Kill app after time interval expires.
169      private long clientTimeout = 600000;
170    
171      // Debug flag
172      boolean debugFlag = false;    
173    
174      /**
175       * @param args Command line arguments 
176       */
177      public static void main(String[] args) {
178        boolean result = false;
179        try {
180          Client client = new Client();
181          LOG.info("Initializing Client");
182          boolean doRun = client.init(args);
183          if (!doRun) {
184            System.exit(0);
185          }
186          result = client.run();
187        } catch (Throwable t) {
188          LOG.fatal("Error running CLient", t);
189          System.exit(1);
190        }
191        if (result) {
192          LOG.info("Application completed successfully");
193          System.exit(0);                   
194        } 
195        LOG.error("Application failed to complete successfully");
196        System.exit(2);
197      }
198    
199      /**
200       */
201      public Client() throws Exception  {
202        // Set up the configuration and RPC
203        conf = new Configuration();
204        rpc = YarnRPC.create(conf);
205      }
206    
207      /**
208       * Helper function to print out usage
209       * @param opts Parsed command line options 
210       */
211      private void printUsage(Options opts) {
212        new HelpFormatter().printHelp("Client", opts);
213      }
214    
215      /**
216       * Parse command line options
217       * @param args Parsed command line options 
218       * @return Whether the init was successful to run the client
219       */
220      public boolean init(String[] args) throws ParseException {
221    
222        Options opts = new Options();
223        opts.addOption("appname", true, "Application Name. Default value - DistributedShell");
224        opts.addOption("priority", true, "Application Priority. Default 0");
225        opts.addOption("queue", true, "RM Queue in which this application is to be submitted");
226        opts.addOption("user", true, "User to run the application as");
227        opts.addOption("timeout", true, "Application timeout in milliseconds");
228        opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master");
229        opts.addOption("jar", true, "Jar file containing the application master");
230        opts.addOption("class", true, "Main class to  be run for the Application Master.");
231        opts.addOption("shell_command", true, "Shell command to be executed by the Application Master");
232        opts.addOption("shell_script", true, "Location of the shell script to be executed");
233        opts.addOption("shell_args", true, "Command line args for the shell script");
234        opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs");
235        opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers");            
236        opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command");
237        opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed");
238        opts.addOption("log_properties", true, "log4j.properties file");
239        opts.addOption("debug", false, "Dump out debug information");
240        opts.addOption("help", false, "Print usage");
241        CommandLine cliParser = new GnuParser().parse(opts, args);
242    
243        if (args.length == 0) {
244          printUsage(opts);
245          throw new IllegalArgumentException("No args specified for client to initialize");
246        }           
247    
248        if (cliParser.hasOption("help")) {
249          printUsage(opts);
250          return false;
251        }
252    
253        if (cliParser.hasOption("debug")) {
254          debugFlag = true;
255    
256        }
257    
258        appName = cliParser.getOptionValue("appname", "DistributedShell");
259        amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0"));
260        amQueue = cliParser.getOptionValue("queue", "");
261        amUser = cliParser.getOptionValue("user", "");
262        amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10"));               
263    
264        if (amMemory < 0) {
265          throw new IllegalArgumentException("Invalid memory specified for application master, exiting."
266              + " Specified memory=" + amMemory);
267        }
268    
269        if (!cliParser.hasOption("jar")) {
270          throw new IllegalArgumentException("No jar file specified for application master");
271        }           
272    
273        appMasterJar = cliParser.getOptionValue("jar");
274        appMasterMainClass = cliParser.getOptionValue("class",
275            "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster");              
276    
277        if (!cliParser.hasOption("shell_command")) {
278          throw new IllegalArgumentException("No shell command specified to be executed by application master");
279        }
280        shellCommand = cliParser.getOptionValue("shell_command");
281    
282        if (cliParser.hasOption("shell_script")) {
283          shellScriptPath = cliParser.getOptionValue("shell_script");
284        }
285        if (cliParser.hasOption("shell_args")) {
286          shellArgs = cliParser.getOptionValue("shell_args");
287        }
288        if (cliParser.hasOption("shell_env")) { 
289          String envs[] = cliParser.getOptionValues("shell_env");
290          for (String env : envs) {
291            env = env.trim();
292            int index = env.indexOf('=');
293            if (index == -1) {
294              shellEnv.put(env, "");
295              continue;
296            }
297            String key = env.substring(0, index);
298            String val = "";
299            if (index < (env.length()-1)) {
300              val = env.substring(index+1);
301            }
302            shellEnv.put(key, val);
303          }
304        }
305        shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0"));
306    
307        containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10"));
308        numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1"));
309    
310        if (containerMemory < 0 || numContainers < 1) {
311          throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting."
312              + " Specified containerMemory=" + containerMemory
313              + ", numContainer=" + numContainers);
314        }
315    
316        clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000"));
317    
318        log4jPropFile = cliParser.getOptionValue("log_properties", "");
319    
320        return true;
321      }
322    
323      /**
324       * Main run function for the client
325       * @return true if application completed successfully
326       * @throws IOException
327       */
328      public boolean run() throws IOException {
329        LOG.info("Starting Client");
330    
331        // Connect to ResourceManager       
332        connectToASM();
333        assert(applicationsManager != null);                
334    
335        // Use ClientRMProtocol handle to general cluster information 
336        GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class);
337        GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq);
338        LOG.info("Got Cluster metric info from ASM" 
339            + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers());
340    
341        GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
342        GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq);
343        LOG.info("Got Cluster node info from ASM");
344        for (NodeReport node : clusterNodesResp.getNodeReports()) {
345          LOG.info("Got node report from ASM for"
346              + ", nodeId=" + node.getNodeId() 
347              + ", nodeAddress" + node.getHttpAddress()
348              + ", nodeRackName" + node.getRackName()
349              + ", nodeNumContainers" + node.getNumContainers()
350              + ", nodeHealthStatus" + node.getNodeHealthStatus());
351        }
352    
353        GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class);
354        GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq);                
355        QueueInfo queueInfo = queueInfoResp.getQueueInfo();
356        LOG.info("Queue info"
357            + ", queueName=" + queueInfo.getQueueName()
358            + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity()
359            + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
360            + ", queueApplicationCount=" + queueInfo.getApplications().size()
361            + ", queueChildQueueCount=" + queueInfo.getChildQueues().size());               
362    
363        GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class);
364        GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq);                            
365        List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList();
366        for (QueueUserACLInfo aclInfo : listAclInfo) {
367          for (QueueACL userAcl : aclInfo.getUserAcls()) {
368            LOG.info("User ACL Info for Queue"
369                + ", queueName=" + aclInfo.getQueueName()                   
370                + ", userAcl=" + userAcl.name());
371          }
372        }           
373    
374        // Get a new application id 
375        GetNewApplicationResponse newApp = getApplication();
376        ApplicationId appId = newApp.getApplicationId();
377    
378        // TODO get min/max resource capabilities from RM and change memory ask if needed
379        // If we do not have min/max, we may not be able to correctly request 
380        // the required resources from the RM for the app master
381        // Memory ask has to be a multiple of min and less than max. 
382        // Dump out information about cluster capability as seen by the resource manager
383        int minMem = newApp.getMinimumResourceCapability().getMemory();
384        int maxMem = newApp.getMaximumResourceCapability().getMemory();
385        LOG.info("Min mem capabililty of resources in this cluster " + minMem);
386        LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
387    
388        // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 
389        // a multiple of the min value and cannot exceed the max. 
390        // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min
391        if (amMemory < minMem) {
392          LOG.info("AM memory specified below min threshold of cluster. Using min value."
393              + ", specified=" + amMemory
394              + ", min=" + minMem);
395          amMemory = minMem; 
396        } 
397        else if (amMemory > maxMem) {
398          LOG.info("AM memory specified above max threshold of cluster. Using max value."
399              + ", specified=" + amMemory
400              + ", max=" + maxMem);
401          amMemory = maxMem;
402        }                           
403    
404        // Create launch context for app master
405        LOG.info("Setting up application submission context for ASM");
406        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
407    
408        // set the application id 
409        appContext.setApplicationId(appId);
410        // set the application name
411        appContext.setApplicationName(appName);
412    
413        // Set up the container launch context for the application master
414        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
415    
416        // set local resources for the application master
417        // local files or archives as needed
418        // In this scenario, the jar file for the application master is part of the local resources                 
419        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
420    
421        LOG.info("Copy App Master jar from local filesystem and add to local environment");
422        // Copy the application master jar to the filesystem 
423        // Create a local resource to point to the destination jar path 
424        FileSystem fs = FileSystem.get(conf);
425        Path src = new Path(appMasterJar);
426        String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";           
427        Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
428        fs.copyFromLocalFile(false, true, src, dst);
429        FileStatus destStatus = fs.getFileStatus(dst);
430        LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
431    
432        // Set the type of resource - file or archive
433        // archives are untarred at destination
434        // we don't need the jar file to be untarred for now
435        amJarRsrc.setType(LocalResourceType.FILE);
436        // Set visibility of the resource 
437        // Setting to most private option
438        amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);          
439        // Set the resource to be copied over
440        amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 
441        // Set timestamp and length of file so that the framework 
442        // can do basic sanity checks for the local resource 
443        // after it has been copied over to ensure it is the same 
444        // resource the client intended to use with the application
445        amJarRsrc.setTimestamp(destStatus.getModificationTime());
446        amJarRsrc.setSize(destStatus.getLen());
447        localResources.put("AppMaster.jar",  amJarRsrc);
448    
449        // Set the log4j properties if needed 
450        if (!log4jPropFile.isEmpty()) {
451          Path log4jSrc = new Path(log4jPropFile);
452          Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
453          fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
454          FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
455          LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
456          log4jRsrc.setType(LocalResourceType.FILE);
457          log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);        
458          log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
459          log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
460          log4jRsrc.setSize(log4jFileStatus.getLen());
461          localResources.put("log4j.properties", log4jRsrc);
462        }                   
463    
464        // The shell script has to be made available on the final container(s)
465        // where it will be executed. 
466        // To do this, we need to first copy into the filesystem that is visible 
467        // to the yarn framework. 
468        // We do not need to set this as a local resource for the application 
469        // master as the application master does not need it.               
470        String hdfsShellScriptLocation = ""; 
471        long hdfsShellScriptLen = 0;
472        long hdfsShellScriptTimestamp = 0;
473        if (!shellScriptPath.isEmpty()) {
474          Path shellSrc = new Path(shellScriptPath);
475          String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh";
476          Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
477          fs.copyFromLocalFile(false, true, shellSrc, shellDst);
478          hdfsShellScriptLocation = shellDst.toUri().toString(); 
479          FileStatus shellFileStatus = fs.getFileStatus(shellDst);
480          hdfsShellScriptLen = shellFileStatus.getLen();
481          hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
482        }
483    
484        // Set local resource info into app master container launch context
485        amContainer.setLocalResources(localResources);
486    
487        // Set the necessary security tokens as needed
488        //amContainer.setContainerTokens(containerToken);
489    
490        // Set the env variables to be setup in the env where the application master will be run
491        LOG.info("Set the environment for the application master");
492        Map<String, String> env = new HashMap<String, String>();
493    
494        // put location of shell script into env
495        // using the env info, the application master will create the correct local resource for the 
496        // eventual containers that will be launched to execute the shell scripts
497        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
498        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
499        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
500    
501        // Add AppMaster.jar location to classpath          
502        // At some point we should not be required to add 
503        // the hadoop specific classpaths to the env. 
504        // It should be provided out of the box. 
505        // For now setting all required classpaths including
506        // the classpath to "." for the application jar
507        String classPathEnv = "${CLASSPATH}"
508            + ":./*"
509            + ":$HADOOP_CONF_DIR"
510            + ":$HADOOP_COMMON_HOME/share/hadoop/common/*"
511            + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*"
512            + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*"
513            + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*"
514            + ":$YARN_HOME/modules/*"
515            + ":$YARN_HOME/lib/*"
516            + ":./log4j.properties:";
517    
518        // add the runtime classpath needed for tests to work 
519        String testRuntimeClassPath = Client.getTestRuntimeClasspath();
520        classPathEnv += ":" + testRuntimeClassPath; 
521    
522        env.put("CLASSPATH", classPathEnv);
523    
524        amContainer.setEnvironment(env);
525    
526        // Set the necessary command to execute the application master 
527        Vector<CharSequence> vargs = new Vector<CharSequence>(30);
528    
529        // Set java executable command 
530        LOG.info("Setting up app master command");
531        vargs.add("${JAVA_HOME}" + "/bin/java");
532        // Set Xmx based on am memory size
533        vargs.add("-Xmx" + amMemory + "m");
534        // Set class name 
535        vargs.add(appMasterMainClass);
536        // Set params for Application Master
537        vargs.add("--container_memory " + String.valueOf(containerMemory));
538        vargs.add("--num_containers " + String.valueOf(numContainers));
539        vargs.add("--priority " + String.valueOf(shellCmdPriority));
540        if (!shellCommand.isEmpty()) {
541          vargs.add("--shell_command " + shellCommand + "");
542        }
543        if (!shellArgs.isEmpty()) {
544          vargs.add("--shell_args " + shellArgs + "");
545        }
546        for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
547          vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
548        }                   
549        if (debugFlag) {
550          vargs.add("--debug");
551        }
552    
553        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
554        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");
555    
556        // Get final commmand
557        StringBuilder command = new StringBuilder();
558        for (CharSequence str : vargs) {
559          command.append(str).append(" ");
560        }
561    
562        LOG.info("Completed setting up app master command " + command.toString());     
563        List<String> commands = new ArrayList<String>();
564        commands.add(command.toString());           
565        amContainer.setCommands(commands);
566    
567        // For launching an AM Container, setting user here is not needed
568        // Set user in ApplicationSubmissionContext
569        // amContainer.setUser(amUser);
570    
571        // Set up resource type requirements
572        // For now, only memory is supported so we set memory requirements
573        Resource capability = Records.newRecord(Resource.class);
574        capability.setMemory(amMemory);
575        amContainer.setResource(capability);
576    
577        // Service data is a binary blob that can be passed to the application
578        // Not needed in this scenario
579        // amContainer.setServiceData(serviceData);
580    
581        // The following are not required for launching an application master 
582        // amContainer.setContainerId(containerId);         
583    
584        appContext.setAMContainerSpec(amContainer);
585    
586        // Set the priority for the application master
587        Priority pri = Records.newRecord(Priority.class);
588        // TODO - what is the range for priority? how to decide? 
589        pri.setPriority(amPriority);
590        appContext.setPriority(pri);
591    
592        // Set the queue to which this application is to be submitted in the RM
593        appContext.setQueue(amQueue);
594        // Set the user submitting this application 
595        // TODO can it be empty? 
596        appContext.setUser(amUser);
597    
598        // Create the request to send to the applications manager 
599        SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class);
600        appRequest.setApplicationSubmissionContext(appContext);
601    
602        // Submit the application to the applications manager
603        // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
604        // Ignore the response as either a valid response object is returned on success 
605        // or an exception thrown to denote some form of a failure
606        LOG.info("Submitting application to ASM");
607        applicationsManager.submitApplication(appRequest);
608    
609        // TODO
610        // Try submitting the same request again
611        // app submission failure?
612    
613        // Monitor the application
614        return monitorApplication(appId);
615    
616      }
617    
618      /**
619       * Monitor the submitted application for completion. 
620       * Kill application if time expires. 
621       * @param appId Application Id of application to be monitored
622       * @return true if application completed successfully
623       * @throws YarnRemoteException
624       */
625      private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException {
626    
627        while (true) {
628    
629          // Check app status every 1 second.
630          try {
631            Thread.sleep(1000);
632          } catch (InterruptedException e) {
633            LOG.debug("Thread sleep in monitoring loop interrupted");
634          }
635    
636          // Get application report for the appId we are interested in 
637          GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class);
638          reportRequest.setApplicationId(appId);
639          GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest);
640          ApplicationReport report = reportResponse.getApplicationReport();
641    
642          LOG.info("Got application report from ASM for"
643              + ", appId=" + appId.getId()
644              + ", clientToken=" + report.getClientToken()
645              + ", appDiagnostics=" + report.getDiagnostics()
646              + ", appMasterHost=" + report.getHost()
647              + ", appQueue=" + report.getQueue()
648              + ", appMasterRpcPort=" + report.getRpcPort()
649              + ", appStartTime=" + report.getStartTime()
650              + ", yarnAppState=" + report.getYarnApplicationState().toString()
651              + ", distributedFinalState=" + report.getFinalApplicationStatus().toString()
652              + ", appTrackingUrl=" + report.getTrackingUrl()
653              + ", appUser=" + report.getUser());
654    
655          YarnApplicationState state = report.getYarnApplicationState();
656          FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
657          if (YarnApplicationState.FINISHED == state) {
658            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
659              LOG.info("Application has completed successfully. Breaking monitoring loop");
660              return true;        
661            }
662            else {
663              LOG.info("Application did finished unsuccessfully."
664                  + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
665                  + ". Breaking monitoring loop");
666              return false;
667            }                         
668          }
669          else if (YarnApplicationState.KILLED == state     
670              || YarnApplicationState.FAILED == state) {
671            LOG.info("Application did not finish."
672                + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
673                + ". Breaking monitoring loop");
674            return false;
675          }                 
676    
677          if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
678            LOG.info("Reached client specified timeout for application. Killing application");
679            killApplication(appId);
680            return false;                           
681          }
682        }                   
683    
684      }
685    
686      /**
687       * Kill a submitted application by sending a call to the ASM
688       * @param appId Application Id to be killed. 
689       * @throws YarnRemoteException
690       */
691      private void killApplication(ApplicationId appId) throws YarnRemoteException {
692        KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class);           
693        // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 
694        // the same time. 
695        // If yes, can we kill a particular attempt only?
696        request.setApplicationId(appId);
697        // KillApplicationResponse response = applicationsManager.forceKillApplication(request);            
698        // Response can be ignored as it is non-null on success or 
699        // throws an exception in case of failures
700        applicationsManager.forceKillApplication(request);  
701      }
702    
703      /**
704       * Connect to the Resource Manager/Applications Manager
705       * @return Handle to communicate with the ASM
706       * @throws IOException 
707       */
708      private void connectToASM() throws IOException {
709    
710        /*
711                    UserGroupInformation user = UserGroupInformation.getCurrentUser();
712                    applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() {
713                            public ClientRMProtocol run() {
714                                    InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get(
715                                            YarnConfiguration.RM_SCHEDULER_ADDRESS,
716                                            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));               
717                                    LOG.info("Connecting to ResourceManager at " + rmAddress);
718                                    Configuration appsManagerServerConf = new Configuration(conf);
719                                    appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO,
720                                    ClientRMSecurityInfo.class, SecurityInfo.class);
721                                    ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf));
722                                    return asm;
723                            }
724                    });
725         */
726        YarnConfiguration yarnConf = new YarnConfiguration(conf);
727        InetSocketAddress rmAddress = yarnConf.getSocketAddr(
728            YarnConfiguration.RM_ADDRESS,
729            YarnConfiguration.DEFAULT_RM_ADDRESS,
730            YarnConfiguration.DEFAULT_RM_PORT);
731        LOG.info("Connecting to ResourceManager at " + rmAddress);
732        applicationsManager = ((ClientRMProtocol) rpc.getProxy(
733            ClientRMProtocol.class, rmAddress, conf));
734      }             
735    
736      /**
737       * Get a new application from the ASM 
738       * @return New Application
739       * @throws YarnRemoteException
740       */
741      private GetNewApplicationResponse getApplication() throws YarnRemoteException {
742        GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class);               
743        GetNewApplicationResponse response = applicationsManager.getNewApplication(request);
744        LOG.info("Got new application id=" + response.getApplicationId());          
745        return response;            
746      }
747    
748      private static String getTestRuntimeClasspath() {
749    
750        InputStream classpathFileStream = null;
751        BufferedReader reader = null;
752        String envClassPath = "";
753    
754        LOG.info("Trying to generate classpath for app master from current thread's classpath");
755        try {
756    
757          // Create classpath from generated classpath
758          // Check maven ppom.xml for generated classpath info
759          // Works if compile time env is same as runtime. Mainly tests.
760          ClassLoader thisClassLoader =
761              Thread.currentThread().getContextClassLoader();
762          String generatedClasspathFile = "yarn-apps-ds-generated-classpath";
763          classpathFileStream =
764              thisClassLoader.getResourceAsStream(generatedClasspathFile);
765          if (classpathFileStream == null) {
766            LOG.info("Could not classpath resource from class loader");
767            return envClassPath;
768          }
769          LOG.info("Readable bytes from stream=" + classpathFileStream.available());
770          reader = new BufferedReader(new InputStreamReader(classpathFileStream));
771          String cp = reader.readLine();
772          if (cp != null) {
773            envClassPath += cp.trim() + ":";
774          }
775          // Put the file itself on classpath for tasks.
776          envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile();
777        } catch (IOException e) {
778          LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage());
779        } 
780    
781        try {
782          if (classpathFileStream != null) {
783            classpathFileStream.close();
784          }
785          if (reader != null) {
786            reader.close();
787          }
788        } catch (IOException e) {
789          LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage());
790        } 
791        return envClassPath;
792      }                     
793    
794    }