001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.yarn.applications.distributedshell;
020    
021    import java.io.BufferedReader;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.InputStreamReader;
025    import java.net.InetSocketAddress;
026    import java.util.ArrayList;
027    import java.util.HashMap;
028    import java.util.List;
029    import java.util.Map;
030    import java.util.Vector;
031    
032    import org.apache.commons.cli.CommandLine;
033    import org.apache.commons.cli.GnuParser;
034    import org.apache.commons.cli.HelpFormatter;
035    import org.apache.commons.cli.Options;
036    import org.apache.commons.cli.ParseException;
037    import org.apache.commons.logging.Log;
038    import org.apache.commons.logging.LogFactory;
039    import org.apache.hadoop.classification.InterfaceAudience;
040    import org.apache.hadoop.classification.InterfaceStability;
041    import org.apache.hadoop.conf.Configuration;
042    import org.apache.hadoop.fs.FileStatus;
043    import org.apache.hadoop.fs.FileSystem;
044    import org.apache.hadoop.fs.Path;
045    import org.apache.hadoop.net.NetUtils;
046    import org.apache.hadoop.security.SecurityInfo;
047    import org.apache.hadoop.yarn.api.ApplicationConstants;
048    import org.apache.hadoop.yarn.api.ClientRMProtocol;
049    import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
050    import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
051    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
052    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
053    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
054    import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
055    import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
056    import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
057    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
058    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
059    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
060    import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
061    import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
062    import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
063    import org.apache.hadoop.yarn.api.records.ApplicationId;
064    import org.apache.hadoop.yarn.api.records.ApplicationReport;
065    import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
066    import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
067    import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
068    import org.apache.hadoop.yarn.api.records.LocalResource;
069    import org.apache.hadoop.yarn.api.records.LocalResourceType;
070    import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
071    import org.apache.hadoop.yarn.api.records.NodeReport;
072    import org.apache.hadoop.yarn.api.records.Priority;
073    import org.apache.hadoop.yarn.api.records.QueueACL;
074    import org.apache.hadoop.yarn.api.records.QueueInfo;
075    import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
076    import org.apache.hadoop.yarn.api.records.Resource;
077    import org.apache.hadoop.yarn.api.records.YarnApplicationState;
078    import org.apache.hadoop.yarn.conf.YarnConfiguration;
079    import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
080    import org.apache.hadoop.yarn.ipc.YarnRPC;
081    import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo;
082    import org.apache.hadoop.yarn.util.ConverterUtils;
083    import org.apache.hadoop.yarn.util.Records;
084    
085    
086    /**
087     * Client for Distributed Shell application submission to YARN.
088     * 
089     * <p> The distributed shell client allows an application master to be launched that in turn would run 
090     * the provided shell command on a set of containers. </p>
091     * 
092     * <p>This client is meant to act as an example on how to write yarn-based applications. </p>
093     * 
094     * <p> To submit an application, a client first needs to connect to the <code>ResourceManager</code> 
095     * aka ApplicationsManager or ASM via the {@link ClientRMProtocol}. The {@link ClientRMProtocol} 
096     * provides a way for the client to get access to cluster information and to request for a
097     * new {@link ApplicationId}. <p>
098     * 
099     * <p> For the actual job submission, the client first has to create an {@link ApplicationSubmissionContext}. 
100     * The {@link ApplicationSubmissionContext} defines the application details such as {@link ApplicationId} 
101     * and application name, user submitting the application, the priority assigned to the application and the queue 
102     * to which this application needs to be assigned. In addition to this, the {@link ApplicationSubmissionContext}
103     * also defines the {@link ContainerLaunchContext} which describes the <code>Container</code> with which 
104     * the {@link ApplicationMaster} is launched. </p>
105     * 
106     * <p> The {@link ContainerLaunchContext} in this scenario defines the resources to be allocated for the 
107     * {@link ApplicationMaster}'s container, the local resources (jars, configuration files) to be made available 
108     * and the environment to be set for the {@link ApplicationMaster} and the commands to be executed to run the 
109     * {@link ApplicationMaster}. <p>
110     * 
111     * <p> Using the {@link ApplicationSubmissionContext}, the client submits the application to the 
112     * <code>ResourceManager</code> and then monitors the application by requesting the <code>ResourceManager</code> 
113     * for an {@link ApplicationReport} at regular time intervals. In case of the application taking too long, the client 
114     * kills the application by submitting a {@link KillApplicationRequest} to the <code>ResourceManager</code>. </p>
115     *
116     */
117    @InterfaceAudience.Public
118    @InterfaceStability.Unstable
119    public class Client {
120    
121      private static final Log LOG = LogFactory.getLog(Client.class);
122    
123      // Configuration
124      private Configuration conf;
125    
126      // RPC to communicate to RM
127      private YarnRPC rpc;
128    
129      // Handle to talk to the Resource Manager/Applications Manager
130      private ClientRMProtocol applicationsManager;
131    
132      // Application master specific info to register a new Application with RM/ASM
133      private String appName = "";
134      // App master priority
135      private int amPriority = 0;
136      // Queue for App master
137      private String amQueue = "";
138      // User to run app master as
139      private String amUser = "";
140      // Amt. of memory resource to request for to run the App Master
141      private int amMemory = 10; 
142    
143      // Application master jar file
144      private String appMasterJar = ""; 
145      // Main class to invoke application master
146      private String appMasterMainClass = "";
147    
148      // Shell command to be executed 
149      private String shellCommand = ""; 
150      // Location of shell script 
151      private String shellScriptPath = ""; 
152      // Args to be passed to the shell command
153      private String shellArgs = "";
154      // Env variables to be setup for the shell command 
155      private Map<String, String> shellEnv = new HashMap<String, String>();
156      // Shell Command Container priority 
157      private int shellCmdPriority = 0;
158    
159      // Amt of memory to request for container in which shell script will be executed
160      private int containerMemory = 10; 
161      // No. of containers in which the shell script needs to be executed
162      private int numContainers = 1;
163    
164      // log4j.properties file 
165      // if available, add to local resources and set into classpath 
166      private String log4jPropFile = "";    
167    
168      // Start time for client
169      private final long clientStartTime = System.currentTimeMillis();
170      // Timeout threshold for client. Kill app after time interval expires.
171      private long clientTimeout = 600000;
172    
173      // Debug flag
174      boolean debugFlag = false;    
175    
176      /**
177       * @param args Command line arguments 
178       */
179      public static void main(String[] args) {
180        boolean result = false;
181        try {
182          Client client = new Client();
183          LOG.info("Initializing Client");
184          boolean doRun = client.init(args);
185          if (!doRun) {
186            System.exit(0);
187          }
188          result = client.run();
189        } catch (Throwable t) {
190          LOG.fatal("Error running CLient", t);
191          System.exit(1);
192        }
193        if (result) {
194          LOG.info("Application completed successfully");
195          System.exit(0);                   
196        } 
197        LOG.error("Application failed to complete successfully");
198        System.exit(2);
199      }
200    
201      /**
202       */
203      public Client() throws Exception  {
204        // Set up the configuration and RPC
205        conf = new Configuration();
206        rpc = YarnRPC.create(conf);
207      }
208    
209      /**
210       * Helper function to print out usage
211       * @param opts Parsed command line options 
212       */
213      private void printUsage(Options opts) {
214        new HelpFormatter().printHelp("Client", opts);
215      }
216    
217      /**
218       * Parse command line options
219       * @param args Parsed command line options 
220       * @return Whether the init was successful to run the client
221       */
222      public boolean init(String[] args) throws ParseException {
223    
224        Options opts = new Options();
225        opts.addOption("appname", true, "Application Name. Default value - DistributedShell");
226        opts.addOption("priority", true, "Application Priority. Default 0");
227        opts.addOption("queue", true, "RM Queue in which this application is to be submitted");
228        opts.addOption("user", true, "User to run the application as");
229        opts.addOption("timeout", true, "Application timeout in milliseconds");
230        opts.addOption("master_memory", true, "Amount of memory in MB to be requested to run the application master");
231        opts.addOption("jar", true, "Jar file containing the application master");
232        opts.addOption("class", true, "Main class to  be run for the Application Master.");
233        opts.addOption("shell_command", true, "Shell command to be executed by the Application Master");
234        opts.addOption("shell_script", true, "Location of the shell script to be executed");
235        opts.addOption("shell_args", true, "Command line args for the shell script");
236        opts.addOption("shell_env", true, "Environment for shell script. Specified as env_key=env_val pairs");
237        opts.addOption("shell_cmd_priority", true, "Priority for the shell command containers");            
238        opts.addOption("container_memory", true, "Amount of memory in MB to be requested to run the shell command");
239        opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed");
240        opts.addOption("log_properties", true, "log4j.properties file");
241        opts.addOption("debug", false, "Dump out debug information");
242        opts.addOption("help", false, "Print usage");
243        CommandLine cliParser = new GnuParser().parse(opts, args);
244    
245        if (args.length == 0) {
246          printUsage(opts);
247          throw new IllegalArgumentException("No args specified for client to initialize");
248        }           
249    
250        if (cliParser.hasOption("help")) {
251          printUsage(opts);
252          return false;
253        }
254    
255        if (cliParser.hasOption("debug")) {
256          debugFlag = true;
257    
258        }
259    
260        appName = cliParser.getOptionValue("appname", "DistributedShell");
261        amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0"));
262        amQueue = cliParser.getOptionValue("queue", "");
263        amUser = cliParser.getOptionValue("user", "");
264        amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10"));               
265    
266        if (amMemory < 0) {
267          throw new IllegalArgumentException("Invalid memory specified for application master, exiting."
268              + " Specified memory=" + amMemory);
269        }
270    
271        if (!cliParser.hasOption("jar")) {
272          throw new IllegalArgumentException("No jar file specified for application master");
273        }           
274    
275        appMasterJar = cliParser.getOptionValue("jar");
276        appMasterMainClass = cliParser.getOptionValue("class",
277            "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster");              
278    
279        if (!cliParser.hasOption("shell_command")) {
280          throw new IllegalArgumentException("No shell command specified to be executed by application master");
281        }
282        shellCommand = cliParser.getOptionValue("shell_command");
283    
284        if (cliParser.hasOption("shell_script")) {
285          shellScriptPath = cliParser.getOptionValue("shell_script");
286        }
287        if (cliParser.hasOption("shell_args")) {
288          shellArgs = cliParser.getOptionValue("shell_args");
289        }
290        if (cliParser.hasOption("shell_env")) { 
291          String envs[] = cliParser.getOptionValues("shell_env");
292          for (String env : envs) {
293            env = env.trim();
294            int index = env.indexOf('=');
295            if (index == -1) {
296              shellEnv.put(env, "");
297              continue;
298            }
299            String key = env.substring(0, index);
300            String val = "";
301            if (index < (env.length()-1)) {
302              val = env.substring(index+1);
303            }
304            shellEnv.put(key, val);
305          }
306        }
307        shellCmdPriority = Integer.parseInt(cliParser.getOptionValue("shell_cmd_priority", "0"));
308    
309        containerMemory = Integer.parseInt(cliParser.getOptionValue("container_memory", "10"));
310        numContainers = Integer.parseInt(cliParser.getOptionValue("num_containers", "1"));
311    
312        if (containerMemory < 0 || numContainers < 1) {
313          throw new IllegalArgumentException("Invalid no. of containers or container memory specified, exiting."
314              + " Specified containerMemory=" + containerMemory
315              + ", numContainer=" + numContainers);
316        }
317    
318        clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000"));
319    
320        log4jPropFile = cliParser.getOptionValue("log_properties", "");
321    
322        return true;
323      }
324    
325      /**
326       * Main run function for the client
327       * @return true if application completed successfully
328       * @throws IOException
329       */
330      public boolean run() throws IOException {
331        LOG.info("Starting Client");
332    
333        // Connect to ResourceManager       
334        connectToASM();
335        assert(applicationsManager != null);                
336    
337        // Use ClientRMProtocol handle to general cluster information 
338        GetClusterMetricsRequest clusterMetricsReq = Records.newRecord(GetClusterMetricsRequest.class);
339        GetClusterMetricsResponse clusterMetricsResp = applicationsManager.getClusterMetrics(clusterMetricsReq);
340        LOG.info("Got Cluster metric info from ASM" 
341            + ", numNodeManagers=" + clusterMetricsResp.getClusterMetrics().getNumNodeManagers());
342    
343        GetClusterNodesRequest clusterNodesReq = Records.newRecord(GetClusterNodesRequest.class);
344        GetClusterNodesResponse clusterNodesResp = applicationsManager.getClusterNodes(clusterNodesReq);
345        LOG.info("Got Cluster node info from ASM");
346        for (NodeReport node : clusterNodesResp.getNodeReports()) {
347          LOG.info("Got node report from ASM for"
348              + ", nodeId=" + node.getNodeId() 
349              + ", nodeAddress" + node.getHttpAddress()
350              + ", nodeRackName" + node.getRackName()
351              + ", nodeNumContainers" + node.getNumContainers()
352              + ", nodeHealthStatus" + node.getNodeHealthStatus());
353        }
354    
355        GetQueueInfoRequest queueInfoReq = Records.newRecord(GetQueueInfoRequest.class);
356        GetQueueInfoResponse queueInfoResp = applicationsManager.getQueueInfo(queueInfoReq);                
357        QueueInfo queueInfo = queueInfoResp.getQueueInfo();
358        LOG.info("Queue info"
359            + ", queueName=" + queueInfo.getQueueName()
360            + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity()
361            + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity()
362            + ", queueApplicationCount=" + queueInfo.getApplications().size()
363            + ", queueChildQueueCount=" + queueInfo.getChildQueues().size());               
364    
365        GetQueueUserAclsInfoRequest queueUserAclsReq = Records.newRecord(GetQueueUserAclsInfoRequest.class);
366        GetQueueUserAclsInfoResponse queueUserAclsResp = applicationsManager.getQueueUserAcls(queueUserAclsReq);                            
367        List<QueueUserACLInfo> listAclInfo = queueUserAclsResp.getUserAclsInfoList();
368        for (QueueUserACLInfo aclInfo : listAclInfo) {
369          for (QueueACL userAcl : aclInfo.getUserAcls()) {
370            LOG.info("User ACL Info for Queue"
371                + ", queueName=" + aclInfo.getQueueName()                   
372                + ", userAcl=" + userAcl.name());
373          }
374        }           
375    
376        // Get a new application id 
377        GetNewApplicationResponse newApp = getApplication();
378        ApplicationId appId = newApp.getApplicationId();
379    
380        // TODO get min/max resource capabilities from RM and change memory ask if needed
381        // If we do not have min/max, we may not be able to correctly request 
382        // the required resources from the RM for the app master
383        // Memory ask has to be a multiple of min and less than max. 
384        // Dump out information about cluster capability as seen by the resource manager
385        int minMem = newApp.getMinimumResourceCapability().getMemory();
386        int maxMem = newApp.getMaximumResourceCapability().getMemory();
387        LOG.info("Min mem capabililty of resources in this cluster " + minMem);
388        LOG.info("Max mem capabililty of resources in this cluster " + maxMem);
389    
390        // A resource ask has to be atleast the minimum of the capability of the cluster, the value has to be 
391        // a multiple of the min value and cannot exceed the max. 
392        // If it is not an exact multiple of min, the RM will allocate to the nearest multiple of min
393        if (amMemory < minMem) {
394          LOG.info("AM memory specified below min threshold of cluster. Using min value."
395              + ", specified=" + amMemory
396              + ", min=" + minMem);
397          amMemory = minMem; 
398        } 
399        else if (amMemory > maxMem) {
400          LOG.info("AM memory specified above max threshold of cluster. Using max value."
401              + ", specified=" + amMemory
402              + ", max=" + maxMem);
403          amMemory = maxMem;
404        }                           
405    
406        // Create launch context for app master
407        LOG.info("Setting up application submission context for ASM");
408        ApplicationSubmissionContext appContext = Records.newRecord(ApplicationSubmissionContext.class);
409    
410        // set the application id 
411        appContext.setApplicationId(appId);
412        // set the application name
413        appContext.setApplicationName(appName);
414    
415        // Set up the container launch context for the application master
416        ContainerLaunchContext amContainer = Records.newRecord(ContainerLaunchContext.class);
417    
418        // set local resources for the application master
419        // local files or archives as needed
420        // In this scenario, the jar file for the application master is part of the local resources                 
421        Map<String, LocalResource> localResources = new HashMap<String, LocalResource>();
422    
423        LOG.info("Copy App Master jar from local filesystem and add to local environment");
424        // Copy the application master jar to the filesystem 
425        // Create a local resource to point to the destination jar path 
426        FileSystem fs = FileSystem.get(conf);
427        Path src = new Path(appMasterJar);
428        String pathSuffix = appName + "/" + appId.getId() + "/AppMaster.jar";           
429        Path dst = new Path(fs.getHomeDirectory(), pathSuffix);
430        fs.copyFromLocalFile(false, true, src, dst);
431        FileStatus destStatus = fs.getFileStatus(dst);
432        LocalResource amJarRsrc = Records.newRecord(LocalResource.class);
433    
434        // Set the type of resource - file or archive
435        // archives are untarred at destination
436        // we don't need the jar file to be untarred for now
437        amJarRsrc.setType(LocalResourceType.FILE);
438        // Set visibility of the resource 
439        // Setting to most private option
440        amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION);          
441        // Set the resource to be copied over
442        amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(dst)); 
443        // Set timestamp and length of file so that the framework 
444        // can do basic sanity checks for the local resource 
445        // after it has been copied over to ensure it is the same 
446        // resource the client intended to use with the application
447        amJarRsrc.setTimestamp(destStatus.getModificationTime());
448        amJarRsrc.setSize(destStatus.getLen());
449        localResources.put("AppMaster.jar",  amJarRsrc);
450    
451        // Set the log4j properties if needed 
452        if (!log4jPropFile.isEmpty()) {
453          Path log4jSrc = new Path(log4jPropFile);
454          Path log4jDst = new Path(fs.getHomeDirectory(), "log4j.props");
455          fs.copyFromLocalFile(false, true, log4jSrc, log4jDst);
456          FileStatus log4jFileStatus = fs.getFileStatus(log4jDst);
457          LocalResource log4jRsrc = Records.newRecord(LocalResource.class);
458          log4jRsrc.setType(LocalResourceType.FILE);
459          log4jRsrc.setVisibility(LocalResourceVisibility.APPLICATION);        
460          log4jRsrc.setResource(ConverterUtils.getYarnUrlFromURI(log4jDst.toUri()));
461          log4jRsrc.setTimestamp(log4jFileStatus.getModificationTime());
462          log4jRsrc.setSize(log4jFileStatus.getLen());
463          localResources.put("log4j.properties", log4jRsrc);
464        }                   
465    
466        // The shell script has to be made available on the final container(s)
467        // where it will be executed. 
468        // To do this, we need to first copy into the filesystem that is visible 
469        // to the yarn framework. 
470        // We do not need to set this as a local resource for the application 
471        // master as the application master does not need it.               
472        String hdfsShellScriptLocation = ""; 
473        long hdfsShellScriptLen = 0;
474        long hdfsShellScriptTimestamp = 0;
475        if (!shellScriptPath.isEmpty()) {
476          Path shellSrc = new Path(shellScriptPath);
477          String shellPathSuffix = appName + "/" + appId.getId() + "/ExecShellScript.sh";
478          Path shellDst = new Path(fs.getHomeDirectory(), shellPathSuffix);
479          fs.copyFromLocalFile(false, true, shellSrc, shellDst);
480          hdfsShellScriptLocation = shellDst.toUri().toString(); 
481          FileStatus shellFileStatus = fs.getFileStatus(shellDst);
482          hdfsShellScriptLen = shellFileStatus.getLen();
483          hdfsShellScriptTimestamp = shellFileStatus.getModificationTime();
484        }
485    
486        // Set local resource info into app master container launch context
487        amContainer.setLocalResources(localResources);
488    
489        // Set the necessary security tokens as needed
490        //amContainer.setContainerTokens(containerToken);
491    
492        // Set the env variables to be setup in the env where the application master will be run
493        LOG.info("Set the environment for the application master");
494        Map<String, String> env = new HashMap<String, String>();
495    
496        // put location of shell script into env
497        // using the env info, the application master will create the correct local resource for the 
498        // eventual containers that will be launched to execute the shell scripts
499        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLOCATION, hdfsShellScriptLocation);
500        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTTIMESTAMP, Long.toString(hdfsShellScriptTimestamp));
501        env.put(DSConstants.DISTRIBUTEDSHELLSCRIPTLEN, Long.toString(hdfsShellScriptLen));
502    
503        // Add AppMaster.jar location to classpath          
504        // At some point we should not be required to add 
505        // the hadoop specific classpaths to the env. 
506        // It should be provided out of the box. 
507        // For now setting all required classpaths including
508        // the classpath to "." for the application jar
509        String classPathEnv = "${CLASSPATH}"
510            + ":./*"
511            + ":$HADOOP_CONF_DIR"
512            + ":$HADOOP_COMMON_HOME/share/hadoop/common/*"
513            + ":$HADOOP_COMMON_HOME/share/hadoop/common/lib/*"
514            + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/*"
515            + ":$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*"
516            + ":$YARN_HOME/modules/*"
517            + ":$YARN_HOME/lib/*"
518            + ":./log4j.properties:";
519    
520        // add the runtime classpath needed for tests to work 
521        String testRuntimeClassPath = Client.getTestRuntimeClasspath();
522        classPathEnv += ":" + testRuntimeClassPath; 
523    
524        env.put("CLASSPATH", classPathEnv);
525    
526        amContainer.setEnvironment(env);
527    
528        // Set the necessary command to execute the application master 
529        Vector<CharSequence> vargs = new Vector<CharSequence>(30);
530    
531        // Set java executable command 
532        LOG.info("Setting up app master command");
533        vargs.add("${JAVA_HOME}" + "/bin/java");
534        // Set Xmx based on am memory size
535        vargs.add("-Xmx" + amMemory + "m");
536        // Set class name 
537        vargs.add(appMasterMainClass);
538        // Set params for Application Master
539        vargs.add("--container_memory " + String.valueOf(containerMemory));
540        vargs.add("--num_containers " + String.valueOf(numContainers));
541        vargs.add("--priority " + String.valueOf(shellCmdPriority));
542        if (!shellCommand.isEmpty()) {
543          vargs.add("--shell_command " + shellCommand + "");
544        }
545        if (!shellArgs.isEmpty()) {
546          vargs.add("--shell_args " + shellArgs + "");
547        }
548        for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
549          vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
550        }                   
551        if (debugFlag) {
552          vargs.add("--debug");
553        }
554    
555        vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stdout");
556        vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/AppMaster.stderr");
557    
558        // Get final commmand
559        StringBuilder command = new StringBuilder();
560        for (CharSequence str : vargs) {
561          command.append(str).append(" ");
562        }
563    
564        LOG.info("Completed setting up app master command " + command.toString());     
565        List<String> commands = new ArrayList<String>();
566        commands.add(command.toString());           
567        amContainer.setCommands(commands);
568    
569        // For launching an AM Container, setting user here is not needed
570        // Set user in ApplicationSubmissionContext
571        // amContainer.setUser(amUser);
572    
573        // Set up resource type requirements
574        // For now, only memory is supported so we set memory requirements
575        Resource capability = Records.newRecord(Resource.class);
576        capability.setMemory(amMemory);
577        amContainer.setResource(capability);
578    
579        // Service data is a binary blob that can be passed to the application
580        // Not needed in this scenario
581        // amContainer.setServiceData(serviceData);
582    
583        // The following are not required for launching an application master 
584        // amContainer.setContainerId(containerId);         
585    
586        appContext.setAMContainerSpec(amContainer);
587    
588        // Set the priority for the application master
589        Priority pri = Records.newRecord(Priority.class);
590        // TODO - what is the range for priority? how to decide? 
591        pri.setPriority(amPriority);
592        appContext.setPriority(pri);
593    
594        // Set the queue to which this application is to be submitted in the RM
595        appContext.setQueue(amQueue);
596        // Set the user submitting this application 
597        // TODO can it be empty? 
598        appContext.setUser(amUser);
599    
600        // Create the request to send to the applications manager 
601        SubmitApplicationRequest appRequest = Records.newRecord(SubmitApplicationRequest.class);
602        appRequest.setApplicationSubmissionContext(appContext);
603    
604        // Submit the application to the applications manager
605        // SubmitApplicationResponse submitResp = applicationsManager.submitApplication(appRequest);
606        // Ignore the response as either a valid response object is returned on success 
607        // or an exception thrown to denote some form of a failure
608        LOG.info("Submitting application to ASM");
609        applicationsManager.submitApplication(appRequest);
610    
611        // TODO
612        // Try submitting the same request again
613        // app submission failure?
614    
615        // Monitor the application
616        return monitorApplication(appId);
617    
618      }
619    
620      /**
621       * Monitor the submitted application for completion. 
622       * Kill application if time expires. 
623       * @param appId Application Id of application to be monitored
624       * @return true if application completed successfully
625       * @throws YarnRemoteException
626       */
627      private boolean monitorApplication(ApplicationId appId) throws YarnRemoteException {
628    
629        while (true) {
630    
631          // Check app status every 1 second.
632          try {
633            Thread.sleep(1000);
634          } catch (InterruptedException e) {
635            LOG.debug("Thread sleep in monitoring loop interrupted");
636          }
637    
638          // Get application report for the appId we are interested in 
639          GetApplicationReportRequest reportRequest = Records.newRecord(GetApplicationReportRequest.class);
640          reportRequest.setApplicationId(appId);
641          GetApplicationReportResponse reportResponse = applicationsManager.getApplicationReport(reportRequest);
642          ApplicationReport report = reportResponse.getApplicationReport();
643    
644          LOG.info("Got application report from ASM for"
645              + ", appId=" + appId.getId()
646              + ", clientToken=" + report.getClientToken()
647              + ", appDiagnostics=" + report.getDiagnostics()
648              + ", appMasterHost=" + report.getHost()
649              + ", appQueue=" + report.getQueue()
650              + ", appMasterRpcPort=" + report.getRpcPort()
651              + ", appStartTime=" + report.getStartTime()
652              + ", yarnAppState=" + report.getYarnApplicationState().toString()
653              + ", distributedFinalState=" + report.getFinalApplicationStatus().toString()
654              + ", appTrackingUrl=" + report.getTrackingUrl()
655              + ", appUser=" + report.getUser());
656    
657          YarnApplicationState state = report.getYarnApplicationState();
658          FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
659          if (YarnApplicationState.FINISHED == state) {
660            if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
661              LOG.info("Application has completed successfully. Breaking monitoring loop");
662              return true;        
663            }
664            else {
665              LOG.info("Application did finished unsuccessfully."
666                  + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
667                  + ". Breaking monitoring loop");
668              return false;
669            }                         
670          }
671          else if (YarnApplicationState.KILLED == state     
672              || YarnApplicationState.FAILED == state) {
673            LOG.info("Application did not finish."
674                + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
675                + ". Breaking monitoring loop");
676            return false;
677          }                 
678    
679          if (System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
680            LOG.info("Reached client specified timeout for application. Killing application");
681            killApplication(appId);
682            return false;                           
683          }
684        }                   
685    
686      }
687    
688      /**
689       * Kill a submitted application by sending a call to the ASM
690       * @param appId Application Id to be killed. 
691       * @throws YarnRemoteException
692       */
693      private void killApplication(ApplicationId appId) throws YarnRemoteException {
694        KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class);           
695        // TODO clarify whether multiple jobs with the same app id can be submitted and be running at 
696        // the same time. 
697        // If yes, can we kill a particular attempt only?
698        request.setApplicationId(appId);
699        // KillApplicationResponse response = applicationsManager.forceKillApplication(request);            
700        // Response can be ignored as it is non-null on success or 
701        // throws an exception in case of failures
702        applicationsManager.forceKillApplication(request);  
703      }
704    
705      /**
706       * Connect to the Resource Manager/Applications Manager
707       * @return Handle to communicate with the ASM
708       * @throws IOException 
709       */
710      private void connectToASM() throws IOException {
711    
712        /*
713                    UserGroupInformation user = UserGroupInformation.getCurrentUser();
714                    applicationsManager = user.doAs(new PrivilegedAction<ClientRMProtocol>() {
715                            public ClientRMProtocol run() {
716                                    InetSocketAddress rmAddress = NetUtils.createSocketAddr(conf.get(
717                                            YarnConfiguration.RM_SCHEDULER_ADDRESS,
718                                            YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS));               
719                                    LOG.info("Connecting to ResourceManager at " + rmAddress);
720                                    Configuration appsManagerServerConf = new Configuration(conf);
721                                    appsManagerServerConf.setClass(YarnConfiguration.YARN_SECURITY_INFO,
722                                    ClientRMSecurityInfo.class, SecurityInfo.class);
723                                    ClientRMProtocol asm = ((ClientRMProtocol) rpc.getProxy(ClientRMProtocol.class, rmAddress, appsManagerServerConf));
724                                    return asm;
725                            }
726                    });
727         */
728        YarnConfiguration yarnConf = new YarnConfiguration(conf);
729        InetSocketAddress rmAddress = NetUtils.createSocketAddr(yarnConf.get(
730            YarnConfiguration.RM_ADDRESS,
731            YarnConfiguration.DEFAULT_RM_ADDRESS));         
732        LOG.info("Connecting to ResourceManager at " + rmAddress);
733        applicationsManager = ((ClientRMProtocol) rpc.getProxy(
734            ClientRMProtocol.class, rmAddress, conf));
735      }             
736    
737      /**
738       * Get a new application from the ASM 
739       * @return New Application
740       * @throws YarnRemoteException
741       */
742      private GetNewApplicationResponse getApplication() throws YarnRemoteException {
743        GetNewApplicationRequest request = Records.newRecord(GetNewApplicationRequest.class);               
744        GetNewApplicationResponse response = applicationsManager.getNewApplication(request);
745        LOG.info("Got new application id=" + response.getApplicationId());          
746        return response;            
747      }
748    
749      private static String getTestRuntimeClasspath() {
750    
751        InputStream classpathFileStream = null;
752        BufferedReader reader = null;
753        String envClassPath = "";
754    
755        LOG.info("Trying to generate classpath for app master from current thread's classpath");
756        try {
757    
758          // Create classpath from generated classpath
759          // Check maven ppom.xml for generated classpath info
760          // Works if compile time env is same as runtime. Mainly tests.
761          ClassLoader thisClassLoader =
762              Thread.currentThread().getContextClassLoader();
763          String generatedClasspathFile = "yarn-apps-ds-generated-classpath";
764          classpathFileStream =
765              thisClassLoader.getResourceAsStream(generatedClasspathFile);
766          if (classpathFileStream == null) {
767            LOG.info("Could not classpath resource from class loader");
768            return envClassPath;
769          }
770          LOG.info("Readable bytes from stream=" + classpathFileStream.available());
771          reader = new BufferedReader(new InputStreamReader(classpathFileStream));
772          String cp = reader.readLine();
773          if (cp != null) {
774            envClassPath += cp.trim() + ":";
775          }
776          // Put the file itself on classpath for tasks.
777          envClassPath += thisClassLoader.getResource(generatedClasspathFile).getFile();
778        } catch (IOException e) {
779          LOG.info("Could not find the necessary resource to generate class path for tests. Error=" + e.getMessage());
780        } 
781    
782        try {
783          if (classpathFileStream != null) {
784            classpathFileStream.close();
785          }
786          if (reader != null) {
787            reader.close();
788          }
789        } catch (IOException e) {
790          LOG.info("Failed to close class path file stream or reader. Error=" + e.getMessage());
791        } 
792        return envClassPath;
793      }                     
794    
795    }