001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.mapred;
019    
020    import java.io.FileNotFoundException;
021    import java.io.IOException;
022    import java.net.InetSocketAddress;
023    import java.net.URL;
024    import java.security.PrivilegedExceptionAction;
025    import java.util.ArrayList;
026    import java.util.Collection;
027    import java.util.List;
028    
029    import org.apache.hadoop.classification.InterfaceAudience;
030    import org.apache.hadoop.classification.InterfaceStability;
031    import org.apache.hadoop.conf.Configuration;
032    import org.apache.hadoop.fs.FileSystem;
033    import org.apache.hadoop.fs.Path;
034    import org.apache.hadoop.io.Text;
035    import org.apache.hadoop.mapred.ClusterStatus.BlackListInfo;
036    import org.apache.hadoop.mapreduce.Cluster;
037    import org.apache.hadoop.mapreduce.ClusterMetrics;
038    import org.apache.hadoop.mapreduce.Job;
039    import org.apache.hadoop.mapreduce.QueueInfo;
040    import org.apache.hadoop.mapreduce.TaskTrackerInfo;
041    import org.apache.hadoop.mapreduce.TaskType;
042    import org.apache.hadoop.mapreduce.filecache.DistributedCache;
043    import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
044    import org.apache.hadoop.mapreduce.tools.CLI;
045    import org.apache.hadoop.mapreduce.util.ConfigUtil;
046    import org.apache.hadoop.security.UserGroupInformation;
047    import org.apache.hadoop.security.token.SecretManager.InvalidToken;
048    import org.apache.hadoop.security.token.Token;
049    import org.apache.hadoop.security.token.TokenRenewer;
050    import org.apache.hadoop.util.Tool;
051    import org.apache.hadoop.util.ToolRunner;
052    
053    /**
054     * <code>JobClient</code> is the primary interface for the user-job to interact
055     * with the cluster.
056     * 
057     * <code>JobClient</code> provides facilities to submit jobs, track their 
058     * progress, access component-tasks' reports/logs, get the Map-Reduce cluster
059     * status information etc.
060     * 
061     * <p>The job submission process involves:
062     * <ol>
063     *   <li>
064     *   Checking the input and output specifications of the job.
065     *   </li>
066     *   <li>
067     *   Computing the {@link InputSplit}s for the job.
068     *   </li>
069     *   <li>
070     *   Setup the requisite accounting information for the {@link DistributedCache} 
071     *   of the job, if necessary.
072     *   </li>
073     *   <li>
074     *   Copying the job's jar and configuration to the map-reduce system directory 
075     *   on the distributed file-system. 
076     *   </li>
077     *   <li>
078     *   Submitting the job to the cluster and optionally monitoring
079     *   it's status.
080     *   </li>
081     * </ol></p>
082     *  
083     * Normally the user creates the application, describes various facets of the
084     * job via {@link JobConf} and then uses the <code>JobClient</code> to submit 
085     * the job and monitor its progress.
086     * 
087     * <p>Here is an example on how to use <code>JobClient</code>:</p>
088     * <p><blockquote><pre>
089     *     // Create a new JobConf
090     *     JobConf job = new JobConf(new Configuration(), MyJob.class);
091     *     
092     *     // Specify various job-specific parameters     
093     *     job.setJobName("myjob");
094     *     
095     *     job.setInputPath(new Path("in"));
096     *     job.setOutputPath(new Path("out"));
097     *     
098     *     job.setMapperClass(MyJob.MyMapper.class);
099     *     job.setReducerClass(MyJob.MyReducer.class);
100     *
101     *     // Submit the job, then poll for progress until the job is complete
102     *     JobClient.runJob(job);
103     * </pre></blockquote></p>
104     * 
105     * <h4 id="JobControl">Job Control</h4>
106     * 
107     * <p>At times clients would chain map-reduce jobs to accomplish complex tasks 
108     * which cannot be done via a single map-reduce job. This is fairly easy since 
109     * the output of the job, typically, goes to distributed file-system and that 
110     * can be used as the input for the next job.</p>
111     * 
112     * <p>However, this also means that the onus on ensuring jobs are complete 
113     * (success/failure) lies squarely on the clients. In such situations the 
114     * various job-control options are:
115     * <ol>
116     *   <li>
117     *   {@link #runJob(JobConf)} : submits the job and returns only after 
118     *   the job has completed.
119     *   </li>
120     *   <li>
121     *   {@link #submitJob(JobConf)} : only submits the job, then poll the 
122     *   returned handle to the {@link RunningJob} to query status and make 
123     *   scheduling decisions.
124     *   </li>
125     *   <li>
126     *   {@link JobConf#setJobEndNotificationURI(String)} : setup a notification
127     *   on job-completion, thus avoiding polling.
128     *   </li>
129     * </ol></p>
130     * 
131     * @see JobConf
132     * @see ClusterStatus
133     * @see Tool
134     * @see DistributedCache
135     */
136    @InterfaceAudience.Public
137    @InterfaceStability.Stable
138    public class JobClient extends CLI {
139      public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
140      private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; 
141      
142      static{
143        ConfigUtil.loadResources();
144      }
145    
146      /**
147       * A NetworkedJob is an implementation of RunningJob.  It holds
148       * a JobProfile object to provide some info, and interacts with the
149       * remote service to provide certain functionality.
150       */
151      static class NetworkedJob implements RunningJob {
152        Job job;
153        /**
154         * We store a JobProfile and a timestamp for when we last
155         * acquired the job profile.  If the job is null, then we cannot
156         * perform any of the tasks.  The job might be null if the cluster
157         * has completely forgotten about the job.  (eg, 24 hours after the
158         * job completes.)
159         */
160        public NetworkedJob(JobStatus status, Cluster cluster) throws IOException {
161          job = Job.getInstance(cluster, status, new JobConf(status.getJobFile()));
162        }
163    
164        public NetworkedJob(Job job) throws IOException {
165          this.job = job;
166        }
167    
168        public Configuration getConfiguration() {
169          return job.getConfiguration();
170        }
171    
172        /**
173         * An identifier for the job
174         */
175        public JobID getID() {
176          return JobID.downgrade(job.getJobID());
177        }
178        
179        /** @deprecated This method is deprecated and will be removed. Applications should 
180         * rather use {@link #getID()}.*/
181        @Deprecated
182        public String getJobID() {
183          return getID().toString();
184        }
185        
186        /**
187         * The user-specified job name
188         */
189        public String getJobName() {
190          return job.getJobName();
191        }
192    
193        /**
194         * The name of the job file
195         */
196        public String getJobFile() {
197          return job.getJobFile();
198        }
199    
200        /**
201         * A URL where the job's status can be seen
202         */
203        public String getTrackingURL() {
204          return job.getTrackingURL();
205        }
206    
207        /**
208         * A float between 0.0 and 1.0, indicating the % of map work
209         * completed.
210         */
211        public float mapProgress() throws IOException {
212          try {
213            return job.mapProgress();
214          } catch (InterruptedException ie) {
215            throw new IOException(ie);
216          }
217        }
218    
219        /**
220         * A float between 0.0 and 1.0, indicating the % of reduce work
221         * completed.
222         */
223        public float reduceProgress() throws IOException {
224          try {
225            return job.reduceProgress();
226          } catch (InterruptedException ie) {
227            throw new IOException(ie);
228          }
229        }
230    
231        /**
232         * A float between 0.0 and 1.0, indicating the % of cleanup work
233         * completed.
234         */
235        public float cleanupProgress() throws IOException {
236          try {
237            return job.cleanupProgress();
238          } catch (InterruptedException ie) {
239            throw new IOException(ie);
240          }
241        }
242    
243        /**
244         * A float between 0.0 and 1.0, indicating the % of setup work
245         * completed.
246         */
247        public float setupProgress() throws IOException {
248          try {
249            return job.setupProgress();
250          } catch (InterruptedException ie) {
251            throw new IOException(ie);
252          }
253        }
254    
255        /**
256         * Returns immediately whether the whole job is done yet or not.
257         */
258        public synchronized boolean isComplete() throws IOException {
259          try {
260            return job.isComplete();
261          } catch (InterruptedException ie) {
262            throw new IOException(ie);
263          }
264        }
265    
266        /**
267         * True iff job completed successfully.
268         */
269        public synchronized boolean isSuccessful() throws IOException {
270          try {
271            return job.isSuccessful();
272          } catch (InterruptedException ie) {
273            throw new IOException(ie);
274          }
275        }
276    
277        /**
278         * Blocks until the job is finished
279         */
280        public void waitForCompletion() throws IOException {
281          try {
282            job.waitForCompletion(false);
283          } catch (InterruptedException ie) {
284            throw new IOException(ie);
285          } catch (ClassNotFoundException ce) {
286            throw new IOException(ce);
287          }
288        }
289    
290        /**
291         * Tells the service to get the state of the current job.
292         */
293        public synchronized int getJobState() throws IOException {
294          try {
295            return job.getJobState().getValue();
296          } catch (InterruptedException ie) {
297            throw new IOException(ie);
298          }
299        }
300        
301        /**
302         * Tells the service to terminate the current job.
303         */
304        public synchronized void killJob() throws IOException {
305          try {
306            job.killJob();
307          } catch (InterruptedException ie) {
308            throw new IOException(ie);
309          }
310        }
311       
312        
313        /** Set the priority of the job.
314        * @param priority new priority of the job. 
315        */
316        public synchronized void setJobPriority(String priority) 
317                                                    throws IOException {
318          try {
319            job.setPriority(
320              org.apache.hadoop.mapreduce.JobPriority.valueOf(priority));
321          } catch (InterruptedException ie) {
322            throw new IOException(ie);
323          }
324        }
325        
326        /**
327         * Kill indicated task attempt.
328         * @param taskId the id of the task to kill.
329         * @param shouldFail if true the task is failed and added to failed tasks list, otherwise
330         * it is just killed, w/o affecting job failure status.
331         */
332        public synchronized void killTask(TaskAttemptID taskId,
333            boolean shouldFail) throws IOException {
334          try {
335            if (shouldFail) {
336              job.failTask(taskId);
337            } else {
338              job.killTask(taskId);
339            }
340          } catch (InterruptedException ie) {
341            throw new IOException(ie);
342          }
343        }
344    
345        /** @deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}*/
346        @Deprecated
347        public synchronized void killTask(String taskId, boolean shouldFail) throws IOException {
348          killTask(TaskAttemptID.forName(taskId), shouldFail);
349        }
350        
351        /**
352         * Fetch task completion events from cluster for this job. 
353         */
354        public synchronized TaskCompletionEvent[] getTaskCompletionEvents(
355            int startFrom) throws IOException {
356          try {
357            org.apache.hadoop.mapreduce.TaskCompletionEvent[] acls = 
358              job.getTaskCompletionEvents(startFrom, 10);
359            TaskCompletionEvent[] ret = new TaskCompletionEvent[acls.length];
360            for (int i = 0 ; i < acls.length; i++ ) {
361              ret[i] = TaskCompletionEvent.downgrade(acls[i]);
362            }
363            return ret;
364          } catch (InterruptedException ie) {
365            throw new IOException(ie);
366          }
367        }
368    
369        /**
370         * Dump stats to screen
371         */
372        @Override
373        public String toString() {
374          return job.toString();
375        }
376            
377        /**
378         * Returns the counters for this job
379         */
380        public Counters getCounters() throws IOException {
381          try { 
382            Counters result = null;
383            org.apache.hadoop.mapreduce.Counters temp = job.getCounters();
384            if(temp != null) {
385              result = Counters.downgrade(temp);
386            }
387            return result;
388          } catch (InterruptedException ie) {
389            throw new IOException(ie);
390          }
391        }
392        
393        @Override
394        public String[] getTaskDiagnostics(TaskAttemptID id) throws IOException {
395          try { 
396            return job.getTaskDiagnostics(id);
397          } catch (InterruptedException ie) {
398            throw new IOException(ie);
399          }
400        }
401    
402        public String getHistoryUrl() throws IOException {
403          try {
404            return job.getHistoryUrl();
405          } catch (InterruptedException ie) {
406            throw new IOException(ie);
407          }
408        }
409    
410        public boolean isRetired() throws IOException {
411          try {
412            return job.isRetired();
413          } catch (InterruptedException ie) {
414            throw new IOException(ie);
415          }
416        }
417        
418        boolean monitorAndPrintJob() throws IOException, InterruptedException {
419          return job.monitorAndPrintJob();
420        }
421        
422        @Override
423        public String getFailureInfo() throws IOException {
424          try {
425            return job.getStatus().getFailureInfo();
426          } catch (InterruptedException ie) {
427            throw new IOException(ie);
428          }
429        }
430    
431        @Override
432        public JobStatus getJobStatus() throws IOException {
433          try {
434            return JobStatus.downgrade(job.getStatus());
435          } catch (InterruptedException ie) {
436            throw new IOException(ie);
437          }
438        }
439      }
440    
441      /**
442       * Ugi of the client. We store this ugi when the client is created and 
443       * then make sure that the same ugi is used to run the various protocols.
444       */
445      UserGroupInformation clientUgi;
446      
447      /**
448       * Create a job client.
449       */
450      public JobClient() {
451      }
452        
453      /**
454       * Build a job client with the given {@link JobConf}, and connect to the 
455       * default cluster
456       * 
457       * @param conf the job configuration.
458       * @throws IOException
459       */
460      public JobClient(JobConf conf) throws IOException {
461        init(conf);
462      }
463    
464      /**
465       * Build a job client with the given {@link Configuration}, 
466       * and connect to the default cluster
467       * 
468       * @param conf the configuration.
469       * @throws IOException
470       */
471      public JobClient(Configuration conf) throws IOException {
472        init(new JobConf(conf));
473      }
474    
475      /**
476       * Connect to the default cluster
477       * @param conf the job configuration.
478       * @throws IOException
479       */
480      public void init(JobConf conf) throws IOException {
481        setConf(conf);
482        cluster = new Cluster(conf);
483        clientUgi = UserGroupInformation.getCurrentUser();
484      }
485    
486      /**
487       * Build a job client, connect to the indicated job tracker.
488       * 
489       * @param jobTrackAddr the job tracker to connect to.
490       * @param conf configuration.
491       */
492      public JobClient(InetSocketAddress jobTrackAddr, 
493                       Configuration conf) throws IOException {
494        cluster = new Cluster(jobTrackAddr, conf);
495        clientUgi = UserGroupInformation.getCurrentUser();
496      }
497    
498      /**
499       * Close the <code>JobClient</code>.
500       */
501      public synchronized void close() throws IOException {
502        cluster.close();
503      }
504    
505      /**
506       * Get a filesystem handle.  We need this to prepare jobs
507       * for submission to the MapReduce system.
508       * 
509       * @return the filesystem handle.
510       */
511      public synchronized FileSystem getFs() throws IOException {
512        try { 
513          return cluster.getFileSystem();
514        } catch (InterruptedException ie) {
515          throw new IOException(ie);
516        }
517      }
518      
519      /**
520       * Get a handle to the Cluster
521       */
522      public Cluster getClusterHandle() {
523        return cluster;
524      }
525      
526      /**
527       * Submit a job to the MR system.
528       * 
529       * This returns a handle to the {@link RunningJob} which can be used to track
530       * the running-job.
531       * 
532       * @param jobFile the job configuration.
533       * @return a handle to the {@link RunningJob} which can be used to track the
534       *         running-job.
535       * @throws FileNotFoundException
536       * @throws InvalidJobConfException
537       * @throws IOException
538       */
539      public RunningJob submitJob(String jobFile) throws FileNotFoundException, 
540                                                         InvalidJobConfException, 
541                                                         IOException {
542        // Load in the submitted job details
543        JobConf job = new JobConf(jobFile);
544        return submitJob(job);
545      }
546        
547      /**
548       * Submit a job to the MR system.
549       * This returns a handle to the {@link RunningJob} which can be used to track
550       * the running-job.
551       * 
552       * @param conf the job configuration.
553       * @return a handle to the {@link RunningJob} which can be used to track the
554       *         running-job.
555       * @throws FileNotFoundException
556       * @throws IOException
557       */
558      public RunningJob submitJob(final JobConf conf) throws FileNotFoundException,
559                                                      IOException {
560        try {
561          conf.setBooleanIfUnset("mapred.mapper.new-api", false);
562          conf.setBooleanIfUnset("mapred.reducer.new-api", false);
563          Job job = clientUgi.doAs(new PrivilegedExceptionAction<Job> () {
564            @Override
565            public Job run() throws IOException, ClassNotFoundException, 
566              InterruptedException {
567              Job job = Job.getInstance(conf);
568              job.submit();
569              return job;
570            }
571          });
572          // update our Cluster instance with the one created by Job for submission
573          // (we can't pass our Cluster instance to Job, since Job wraps the config
574          // instance, and the two configs would then diverge)
575          cluster = job.getCluster();
576          return new NetworkedJob(job);
577        } catch (InterruptedException ie) {
578          throw new IOException("interrupted", ie);
579        }
580      }
581    
582      private Job getJobUsingCluster(final JobID jobid) throws IOException,
583      InterruptedException {
584        return clientUgi.doAs(new PrivilegedExceptionAction<Job>() {
585          public Job run() throws IOException, InterruptedException  {
586           return cluster.getJob(jobid);
587          }
588        });
589      }
590      /**
591       * Get an {@link RunningJob} object to track an ongoing job.  Returns
592       * null if the id does not correspond to any known job.
593       * 
594       * @param jobid the jobid of the job.
595       * @return the {@link RunningJob} handle to track the job, null if the 
596       *         <code>jobid</code> doesn't correspond to any known job.
597       * @throws IOException
598       */
599      public RunningJob getJob(final JobID jobid) throws IOException {
600        try {
601          
602          Job job = getJobUsingCluster(jobid);
603          if (job != null) {
604            JobStatus status = JobStatus.downgrade(job.getStatus());
605            if (status != null) {
606              return new NetworkedJob(status, cluster);
607            } 
608          }
609        } catch (InterruptedException ie) {
610          throw new IOException(ie);
611        }
612        return null;
613      }
614    
615      /**@deprecated Applications should rather use {@link #getJob(JobID)}. 
616       */
617      @Deprecated
618      public RunningJob getJob(String jobid) throws IOException {
619        return getJob(JobID.forName(jobid));
620      }
621      
622      private static final TaskReport[] EMPTY_TASK_REPORTS = new TaskReport[0];
623      
624      /**
625       * Get the information of the current state of the map tasks of a job.
626       * 
627       * @param jobId the job to query.
628       * @return the list of all of the map tips.
629       * @throws IOException
630       */
631      public TaskReport[] getMapTaskReports(JobID jobId) throws IOException {
632        return getTaskReports(jobId, TaskType.MAP);
633      }
634      
635      private TaskReport[] getTaskReports(final JobID jobId, TaskType type) throws 
636        IOException {
637        try {
638          Job j = getJobUsingCluster(jobId);
639          if(j == null) {
640            return EMPTY_TASK_REPORTS;
641          }
642          return TaskReport.downgradeArray(j.getTaskReports(type));
643        } catch (InterruptedException ie) {
644          throw new IOException(ie);
645        }
646      }
647      
648      /**@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}*/
649      @Deprecated
650      public TaskReport[] getMapTaskReports(String jobId) throws IOException {
651        return getMapTaskReports(JobID.forName(jobId));
652      }
653      
654      /**
655       * Get the information of the current state of the reduce tasks of a job.
656       * 
657       * @param jobId the job to query.
658       * @return the list of all of the reduce tips.
659       * @throws IOException
660       */    
661      public TaskReport[] getReduceTaskReports(JobID jobId) throws IOException {
662        return getTaskReports(jobId, TaskType.REDUCE);
663      }
664    
665      /**
666       * Get the information of the current state of the cleanup tasks of a job.
667       * 
668       * @param jobId the job to query.
669       * @return the list of all of the cleanup tips.
670       * @throws IOException
671       */    
672      public TaskReport[] getCleanupTaskReports(JobID jobId) throws IOException {
673        return getTaskReports(jobId, TaskType.JOB_CLEANUP);
674      }
675    
676      /**
677       * Get the information of the current state of the setup tasks of a job.
678       * 
679       * @param jobId the job to query.
680       * @return the list of all of the setup tips.
681       * @throws IOException
682       */    
683      public TaskReport[] getSetupTaskReports(JobID jobId) throws IOException {
684        return getTaskReports(jobId, TaskType.JOB_SETUP);
685      }
686    
687      
688      /**@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}*/
689      @Deprecated
690      public TaskReport[] getReduceTaskReports(String jobId) throws IOException {
691        return getReduceTaskReports(JobID.forName(jobId));
692      }
693      
694      /**
695       * Display the information about a job's tasks, of a particular type and
696       * in a particular state
697       * 
698       * @param jobId the ID of the job
699       * @param type the type of the task (map/reduce/setup/cleanup)
700       * @param state the state of the task 
701       * (pending/running/completed/failed/killed)
702       */
703      public void displayTasks(final JobID jobId, String type, String state) 
704      throws IOException {
705        try {
706          Job job = getJobUsingCluster(jobId);
707          super.displayTasks(job, type, state);
708        } catch (InterruptedException ie) {
709          throw new IOException(ie);
710        }
711      }
712      
713      /**
714       * Get status information about the Map-Reduce cluster.
715       *  
716       * @return the status information about the Map-Reduce cluster as an object
717       *         of {@link ClusterStatus}.
718       * @throws IOException
719       */
720      public ClusterStatus getClusterStatus() throws IOException {
721        try {
722          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
723            public ClusterStatus run()  throws IOException, InterruptedException {
724              ClusterMetrics metrics = cluster.getClusterStatus();
725              return new ClusterStatus(metrics.getTaskTrackerCount(),
726                  metrics.getBlackListedTaskTrackerCount(), cluster.getTaskTrackerExpiryInterval(),
727                  metrics.getOccupiedMapSlots(),
728                  metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
729                  metrics.getReduceSlotCapacity(),
730                  cluster.getJobTrackerStatus(),
731                  metrics.getDecommissionedTaskTrackerCount());
732            }
733          });
734        }
735          catch (InterruptedException ie) {
736          throw new IOException(ie);
737        }
738      }
739    
740      private  Collection<String> arrayToStringList(TaskTrackerInfo[] objs) {
741        Collection<String> list = new ArrayList<String>();
742        for (TaskTrackerInfo info: objs) {
743          list.add(info.getTaskTrackerName());
744        }
745        return list;
746      }
747    
748      private  Collection<BlackListInfo> arrayToBlackListInfo(TaskTrackerInfo[] objs) {
749        Collection<BlackListInfo> list = new ArrayList<BlackListInfo>();
750        for (TaskTrackerInfo info: objs) {
751          BlackListInfo binfo = new BlackListInfo();
752          binfo.setTrackerName(info.getTaskTrackerName());
753          binfo.setReasonForBlackListing(info.getReasonForBlacklist());
754          binfo.setBlackListReport(info.getBlacklistReport());
755          list.add(binfo);
756        }
757        return list;
758      }
759    
760      /**
761       * Get status information about the Map-Reduce cluster.
762       *  
763       * @param  detailed if true then get a detailed status including the
764       *         tracker names
765       * @return the status information about the Map-Reduce cluster as an object
766       *         of {@link ClusterStatus}.
767       * @throws IOException
768       */
769      public ClusterStatus getClusterStatus(boolean detailed) throws IOException {
770        try {
771          return clientUgi.doAs(new PrivilegedExceptionAction<ClusterStatus>() {
772            public ClusterStatus run() throws IOException, InterruptedException {
773            ClusterMetrics metrics = cluster.getClusterStatus();
774            return new ClusterStatus(arrayToStringList(cluster.getActiveTaskTrackers()),
775              arrayToBlackListInfo(cluster.getBlackListedTaskTrackers()),
776              cluster.getTaskTrackerExpiryInterval(), metrics.getOccupiedMapSlots(),
777              metrics.getOccupiedReduceSlots(), metrics.getMapSlotCapacity(),
778              metrics.getReduceSlotCapacity(), 
779              cluster.getJobTrackerStatus());
780            }
781          });
782        } catch (InterruptedException ie) {
783          throw new IOException(ie);
784        }
785      }
786        
787    
788      /** 
789       * Get the jobs that are not completed and not failed.
790       * 
791       * @return array of {@link JobStatus} for the running/to-be-run jobs.
792       * @throws IOException
793       */
794      public JobStatus[] jobsToComplete() throws IOException {
795        List<JobStatus> stats = new ArrayList<JobStatus>();
796        for (JobStatus stat : getAllJobs()) {
797          if (!stat.isJobComplete()) {
798            stats.add(stat);
799          }
800        }
801        return stats.toArray(new JobStatus[0]);
802      }
803    
804      /** 
805       * Get the jobs that are submitted.
806       * 
807       * @return array of {@link JobStatus} for the submitted jobs.
808       * @throws IOException
809       */
810      public JobStatus[] getAllJobs() throws IOException {
811        try {
812          org.apache.hadoop.mapreduce.JobStatus[] jobs = 
813              clientUgi.doAs(new PrivilegedExceptionAction<
814                  org.apache.hadoop.mapreduce.JobStatus[]> () {
815                public org.apache.hadoop.mapreduce.JobStatus[] run() 
816                    throws IOException, InterruptedException {
817                  return cluster.getAllJobStatuses();
818                }
819              });
820          JobStatus[] stats = new JobStatus[jobs.length];
821          for (int i = 0; i < jobs.length; i++) {
822            stats[i] = JobStatus.downgrade(jobs[i]);
823          }
824          return stats;
825        } catch (InterruptedException ie) {
826          throw new IOException(ie);
827        }
828      }
829      
830      /** 
831       * Utility that submits a job, then polls for progress until the job is
832       * complete.
833       * 
834       * @param job the job configuration.
835       * @throws IOException if the job fails
836       */
837      public static RunningJob runJob(JobConf job) throws IOException {
838        JobClient jc = new JobClient(job);
839        RunningJob rj = jc.submitJob(job);
840        try {
841          if (!jc.monitorAndPrintJob(job, rj)) {
842            throw new IOException("Job failed!");
843          }
844        } catch (InterruptedException ie) {
845          Thread.currentThread().interrupt();
846        }
847        return rj;
848      }
849      
850      /**
851       * Monitor a job and print status in real-time as progress is made and tasks 
852       * fail.
853       * @param conf the job's configuration
854       * @param job the job to track
855       * @return true if the job succeeded
856       * @throws IOException if communication to the JobTracker fails
857       */
858      public boolean monitorAndPrintJob(JobConf conf, 
859                                        RunningJob job
860      ) throws IOException, InterruptedException {
861        return ((NetworkedJob)job).monitorAndPrintJob();
862      }
863    
864      static String getTaskLogURL(TaskAttemptID taskId, String baseUrl) {
865        return (baseUrl + "/tasklog?plaintext=true&attemptid=" + taskId); 
866      }
867      
868      static Configuration getConfiguration(String jobTrackerSpec)
869      {
870        Configuration conf = new Configuration();
871        if (jobTrackerSpec != null) {        
872          if (jobTrackerSpec.indexOf(":") >= 0) {
873            conf.set("mapred.job.tracker", jobTrackerSpec);
874          } else {
875            String classpathFile = "hadoop-" + jobTrackerSpec + ".xml";
876            URL validate = conf.getResource(classpathFile);
877            if (validate == null) {
878              throw new RuntimeException(classpathFile + " not found on CLASSPATH");
879            }
880            conf.addResource(classpathFile);
881          }
882        }
883        return conf;
884      }
885    
886      /**
887       * Sets the output filter for tasks. only those tasks are printed whose
888       * output matches the filter. 
889       * @param newValue task filter.
890       */
891      @Deprecated
892      public void setTaskOutputFilter(TaskStatusFilter newValue){
893        this.taskOutputFilter = newValue;
894      }
895        
896      /**
897       * Get the task output filter out of the JobConf.
898       * 
899       * @param job the JobConf to examine.
900       * @return the filter level.
901       */
902      public static TaskStatusFilter getTaskOutputFilter(JobConf job) {
903        return TaskStatusFilter.valueOf(job.get("jobclient.output.filter", 
904                                                "FAILED"));
905      }
906        
907      /**
908       * Modify the JobConf to set the task output filter.
909       * 
910       * @param job the JobConf to modify.
911       * @param newValue the value to set.
912       */
913      public static void setTaskOutputFilter(JobConf job, 
914                                             TaskStatusFilter newValue) {
915        job.set("jobclient.output.filter", newValue.toString());
916      }
917        
918      /**
919       * Returns task output filter.
920       * @return task filter. 
921       */
922      @Deprecated
923      public TaskStatusFilter getTaskOutputFilter(){
924        return this.taskOutputFilter; 
925      }
926    
927      protected long getCounter(org.apache.hadoop.mapreduce.Counters cntrs,
928          String counterGroupName, String counterName) throws IOException {
929        Counters counters = Counters.downgrade(cntrs);
930        return counters.findCounter(counterGroupName, counterName).getValue();
931      }
932    
933      /**
934       * Get status information about the max available Maps in the cluster.
935       *  
936       * @return the max available Maps in the cluster
937       * @throws IOException
938       */
939      public int getDefaultMaps() throws IOException {
940        try {
941          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
942            @Override
943            public Integer run() throws IOException, InterruptedException {
944              return cluster.getClusterStatus().getMapSlotCapacity();
945            }
946          });
947        } catch (InterruptedException ie) {
948          throw new IOException(ie);
949        }
950      }
951    
952      /**
953       * Get status information about the max available Reduces in the cluster.
954       *  
955       * @return the max available Reduces in the cluster
956       * @throws IOException
957       */
958      public int getDefaultReduces() throws IOException {
959        try {
960          return clientUgi.doAs(new PrivilegedExceptionAction<Integer>() {
961            @Override
962            public Integer run() throws IOException, InterruptedException {
963              return cluster.getClusterStatus().getReduceSlotCapacity();
964            }
965          });
966        } catch (InterruptedException ie) {
967          throw new IOException(ie);
968        }
969      }
970    
971      /**
972       * Grab the jobtracker system directory path where job-specific files are to be placed.
973       * 
974       * @return the system directory where job-specific files are to be placed.
975       */
976      public Path getSystemDir() {
977        try {
978          return clientUgi.doAs(new PrivilegedExceptionAction<Path>() {
979            @Override
980            public Path run() throws IOException, InterruptedException {
981              return cluster.getSystemDir();
982            }
983          });
984          } catch (IOException ioe) {
985          return null;
986        } catch (InterruptedException ie) {
987          return null;
988        }
989      }
990    
991      private JobQueueInfo getJobQueueInfo(QueueInfo queue) {
992        JobQueueInfo ret = new JobQueueInfo(queue);
993        // make sure to convert any children
994        if (queue.getQueueChildren().size() > 0) {
995          List<JobQueueInfo> childQueues = new ArrayList<JobQueueInfo>(queue
996              .getQueueChildren().size());
997          for (QueueInfo child : queue.getQueueChildren()) {
998            childQueues.add(getJobQueueInfo(child));
999          }
1000          ret.setChildren(childQueues);
1001        }
1002        return ret;
1003      }
1004    
1005      private JobQueueInfo[] getJobQueueInfoArray(QueueInfo[] queues)
1006          throws IOException {
1007        JobQueueInfo[] ret = new JobQueueInfo[queues.length];
1008        for (int i = 0; i < queues.length; i++) {
1009          ret[i] = getJobQueueInfo(queues[i]);
1010        }
1011        return ret;
1012      }
1013    
1014      /**
1015       * Returns an array of queue information objects about root level queues
1016       * configured
1017       *
1018       * @return the array of root level JobQueueInfo objects
1019       * @throws IOException
1020       */
1021      public JobQueueInfo[] getRootQueues() throws IOException {
1022        try {
1023          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1024            public JobQueueInfo[] run() throws IOException, InterruptedException {
1025              return getJobQueueInfoArray(cluster.getRootQueues());
1026            }
1027          });
1028        } catch (InterruptedException ie) {
1029          throw new IOException(ie);
1030        }
1031      }
1032    
1033      /**
1034       * Returns an array of queue information objects about immediate children
1035       * of queue queueName.
1036       * 
1037       * @param queueName
1038       * @return the array of immediate children JobQueueInfo objects
1039       * @throws IOException
1040       */
1041      public JobQueueInfo[] getChildQueues(final String queueName) throws IOException {
1042        try {
1043          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1044            public JobQueueInfo[] run() throws IOException, InterruptedException {
1045              return getJobQueueInfoArray(cluster.getChildQueues(queueName));
1046            }
1047          });
1048        } catch (InterruptedException ie) {
1049          throw new IOException(ie);
1050        }
1051      }
1052      
1053      /**
1054       * Return an array of queue information objects about all the Job Queues
1055       * configured.
1056       * 
1057       * @return Array of JobQueueInfo objects
1058       * @throws IOException
1059       */
1060      public JobQueueInfo[] getQueues() throws IOException {
1061        try {
1062          return clientUgi.doAs(new PrivilegedExceptionAction<JobQueueInfo[]>() {
1063            public JobQueueInfo[] run() throws IOException, InterruptedException {
1064              return getJobQueueInfoArray(cluster.getQueues());
1065            }
1066          });
1067        } catch (InterruptedException ie) {
1068          throw new IOException(ie);
1069        }
1070      }
1071      
1072      /**
1073       * Gets all the jobs which were added to particular Job Queue
1074       * 
1075       * @param queueName name of the Job Queue
1076       * @return Array of jobs present in the job queue
1077       * @throws IOException
1078       */
1079      
1080      public JobStatus[] getJobsFromQueue(final String queueName) throws IOException {
1081        try {
1082          QueueInfo queue = clientUgi.doAs(new PrivilegedExceptionAction<QueueInfo>() {
1083            @Override
1084            public QueueInfo run() throws IOException, InterruptedException {
1085              return cluster.getQueue(queueName);
1086            }
1087          });
1088          if (queue == null) {
1089            return null;
1090          }
1091          org.apache.hadoop.mapreduce.JobStatus[] stats = 
1092            queue.getJobStatuses();
1093          JobStatus[] ret = new JobStatus[stats.length];
1094          for (int i = 0 ; i < stats.length; i++ ) {
1095            ret[i] = JobStatus.downgrade(stats[i]);
1096          }
1097          return ret;
1098        } catch (InterruptedException ie) {
1099          throw new IOException(ie);
1100        }
1101      }
1102      
1103      /**
1104       * Gets the queue information associated to a particular Job Queue
1105       * 
1106       * @param queueName name of the job queue.
1107       * @return Queue information associated to particular queue.
1108       * @throws IOException
1109       */
1110      public JobQueueInfo getQueueInfo(final String queueName) throws IOException {
1111        try {
1112          QueueInfo queueInfo = clientUgi.doAs(new 
1113              PrivilegedExceptionAction<QueueInfo>() {
1114            public QueueInfo run() throws IOException, InterruptedException {
1115              return cluster.getQueue(queueName);
1116            }
1117          });
1118          if (queueInfo != null) {
1119            return new JobQueueInfo(queueInfo);
1120          }
1121          return null;
1122        } catch (InterruptedException ie) {
1123          throw new IOException(ie);
1124        }
1125      }
1126      
1127      /**
1128       * Gets the Queue ACLs for current user
1129       * @return array of QueueAclsInfo object for current user.
1130       * @throws IOException
1131       */
1132      public QueueAclsInfo[] getQueueAclsForCurrentUser() throws IOException {
1133        try {
1134          org.apache.hadoop.mapreduce.QueueAclsInfo[] acls = 
1135            clientUgi.doAs(new 
1136                PrivilegedExceptionAction
1137                <org.apache.hadoop.mapreduce.QueueAclsInfo[]>() {
1138                  public org.apache.hadoop.mapreduce.QueueAclsInfo[] run() 
1139                  throws IOException, InterruptedException {
1140                    return cluster.getQueueAclsForCurrentUser();
1141                  }
1142            });
1143          QueueAclsInfo[] ret = new QueueAclsInfo[acls.length];
1144          for (int i = 0 ; i < acls.length; i++ ) {
1145            ret[i] = QueueAclsInfo.downgrade(acls[i]);
1146          }
1147          return ret;
1148        } catch (InterruptedException ie) {
1149          throw new IOException(ie);
1150        }
1151      }
1152    
1153      /**
1154       * Get a delegation token for the user from the JobTracker.
1155       * @param renewer the user who can renew the token
1156       * @return the new token
1157       * @throws IOException
1158       */
1159      public Token<DelegationTokenIdentifier> 
1160        getDelegationToken(final Text renewer) throws IOException, InterruptedException {
1161        return clientUgi.doAs(new 
1162            PrivilegedExceptionAction<Token<DelegationTokenIdentifier>>() {
1163          public Token<DelegationTokenIdentifier> run() throws IOException, 
1164          InterruptedException {
1165            return cluster.getDelegationToken(renewer);
1166          }
1167        });
1168      }
1169    
1170      /**
1171       * Renew a delegation token
1172       * @param token the token to renew
1173       * @return true if the renewal went well
1174       * @throws InvalidToken
1175       * @throws IOException
1176       * @deprecated Use {@link Token#renew} instead
1177       */
1178      public long renewDelegationToken(Token<DelegationTokenIdentifier> token
1179                                       ) throws InvalidToken, IOException, 
1180                                                InterruptedException {
1181        return token.renew(getConf());
1182      }
1183    
1184      /**
1185       * Cancel a delegation token from the JobTracker
1186       * @param token the token to cancel
1187       * @throws IOException
1188       * @deprecated Use {@link Token#cancel} instead
1189       */
1190      public void cancelDelegationToken(Token<DelegationTokenIdentifier> token
1191                                        ) throws InvalidToken, IOException, 
1192                                                 InterruptedException {
1193        token.cancel(getConf());
1194      }
1195    
1196      /**
1197       */
1198      public static void main(String argv[]) throws Exception {
1199        int res = ToolRunner.run(new JobClient(), argv);
1200        System.exit(res);
1201      }
1202    }
1203