001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.ftp.parser;
019
020import java.text.ParseException;
021import java.util.List;
022
023import org.apache.commons.net.ftp.FTPClientConfig;
024import org.apache.commons.net.ftp.FTPFile;
025
026/**
027 * Implementation of FTPFileEntryParser and FTPFileListParser for IBM zOS/MVS
028 * Systems.
029 *
030 * @author <a href="henrik.sorensen@balcab.ch">Henrik Sorensen</a>
031 * @author <a href="jnadler@srcginc.com">Jeff Nadler</a>
032 * @author <a href="wnoto@openfinance.com">William Noto</a>
033 *
034 * @version $Id: MVSFTPEntryParser.java 1490237 2013-06-06 11:17:23Z sebb $
035 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for
036 *      usage instructions)
037 */
038public class MVSFTPEntryParser extends ConfigurableFTPFileEntryParserImpl {
039
040    static final int UNKNOWN_LIST_TYPE = -1;
041    static final int FILE_LIST_TYPE = 0;
042    static final int MEMBER_LIST_TYPE = 1;
043    static final int UNIX_LIST_TYPE = 2;
044    static final int JES_LEVEL_1_LIST_TYPE = 3;
045    static final int JES_LEVEL_2_LIST_TYPE = 4;
046
047    private int isType = UNKNOWN_LIST_TYPE;
048
049    /**
050     * Fallback parser for Unix-style listings
051     */
052    private UnixFTPEntryParser unixFTPEntryParser;
053
054    /**
055     * Dates are ignored for file lists, but are used for member lists where
056     * possible
057     */
058    static final String DEFAULT_DATE_FORMAT = "yyyy/MM/dd HH:mm"; // 2001/09/18
059                                                                    // 13:52
060
061    /**
062     * Matches these entries: Volume Unit Referred Ext Used Recfm Lrecl BlkSz
063     * Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80 80 PS MDI.OKL.WORK
064     *
065     */
066    static final String FILE_LIST_REGEX = "\\S+\\s+" + // volume
067                                                                // ignored
068            "\\S+\\s+" + // unit - ignored
069            "\\S+\\s+" + // access date - ignored
070            "\\S+\\s+" + // extents -ignored
071            "\\S+\\s+" + // used - ignored
072            "[FV]\\S*\\s+" + // recfm - must start with F or V
073            "\\S+\\s+" + // logical record length -ignored
074            "\\S+\\s+" + // block size - ignored
075            "(PS|PO|PO-E)\\s+" + // Dataset organisation. Many exist
076            // but only support: PS, PO, PO-E
077            "(\\S+)\\s*"; // Dataset Name (file name)
078
079    /**
080     * Matches these entries: Name VV.MM Created Changed Size Init Mod Id
081     * TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11 0 KIL001
082     */
083    static final String MEMBER_LIST_REGEX = "(\\S+)\\s+" + // name
084            "\\S+\\s+" + // version, modification (ignored)
085            "\\S+\\s+" + // create date (ignored)
086            "(\\S+)\\s+" + // modification date
087            "(\\S+)\\s+" + // modification time
088            "\\S+\\s+" + // size in lines (ignored)
089            "\\S+\\s+" + // size in lines at creation(ignored)
090            "\\S+\\s+" + // lines modified (ignored)
091            "\\S+\\s*"; // id of user who modified (ignored)
092
093    /**
094     * Matches these entries, note: no header: IBMUSER1 JOB01906 OUTPUT 3 Spool
095     * Files 012345678901234567890123456789012345678901234 1 2 3 4
096     */
097    static final String JES_LEVEL_1_LIST_REGEX = "(\\S+)\\s+" + // job
098                                                                        // name
099                                                                        // ignored
100            "(\\S+)\\s+" + // job number
101            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE)
102            "(\\S+)\\s+" + // number of spool files
103            "(\\S+)\\s+" + // Text "Spool" ignored
104            "(\\S+)\\s*" // Text "Files" ignored
105    ;
106
107    /**
108     * JES INTERFACE LEVEL 2 parser Matches these entries: JOBNAME JOBID OWNER
109     * STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool files
110     * IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
111     * 012345678901234567890123456789012345678901234 1 2 3 4
112     * 012345678901234567890123456789012345678901234567890
113     */
114
115    static final String JES_LEVEL_2_LIST_REGEX = "(\\S+)\\s+" + // job
116                                                                        // name
117                                                                        // ignored
118            "(\\S+)\\s+" + // job number
119            "(\\S+)\\s+" + // owner ignored
120            "(\\S+)\\s+" + // job status (OUTPUT,INPUT,ACTIVE) ignored
121            "(\\S+)\\s+" + // job class ignored
122            "(\\S+).*" // rest ignored
123    ;
124
125    /*
126     * ---------------------------------------------------------------------
127     * Very brief and incomplete description of the zOS/MVS-filesystem. (Note:
128     * "zOS" is the operating system on the mainframe, and is the new name for
129     * MVS)
130     *
131     * The filesystem on the mainframe does not have hierarchal structure as for
132     * example the unix filesystem. For a more comprehensive description, please
133     * refer to the IBM manuals
134     *
135     * @LINK:
136     * http://publibfp.boulder.ibm.com/cgi-bin/bookmgr/BOOKS/dgt2d440/CONTENTS
137     *
138     *
139     * Dataset names =============
140     *
141     * A dataset name consist of a number of qualifiers separated by '.', each
142     * qualifier can be at most 8 characters, and the total length of a dataset
143     * can be max 44 characters including the dots.
144     *
145     *
146     * Dataset organisation ====================
147     *
148     * A dataset represents a piece of storage allocated on one or more disks.
149     * The structure of the storage is described with the field dataset
150     * organinsation (DSORG). There are a number of dataset organisations, but
151     * only two are usable for FTP transfer.
152     *
153     * DSORG: PS: sequential, or flat file PO: partitioned dataset PO-E:
154     * extended partitioned dataset
155     *
156     * The PS file is just a flat file, as you would find it on the unix file
157     * system.
158     *
159     * The PO and PO-E files, can be compared to a single level directory
160     * structure. A PO file consist of a number of dataset members, or files if
161     * you will. It is possible to CD into the file, and to retrieve the
162     * individual members.
163     *
164     *
165     * Dataset record format =====================
166     *
167     * The physical layout of the dataset is described on the dataset itself.
168     * There are a number of record formats (RECFM), but just a few is relavant
169     * for the FTP transfer.
170     *
171     * Any one beginning with either F or V can safely used by FTP transfer. All
172     * others should only be used with great care, so this version will just
173     * ignore the other record formats. F means a fixed number of records per
174     * allocated storage, and V means a variable number of records.
175     *
176     *
177     * Other notes ===========
178     *
179     * The file system supports automatically backup and retrieval of datasets.
180     * If a file is backed up, the ftp LIST command will return: ARCIVE Not
181     * Direct Access Device KJ.IOP998.ERROR.PL.UNITTEST
182     *
183     *
184     * Implementation notes ====================
185     *
186     * Only datasets that have dsorg PS, PO or PO-E and have recfm beginning
187     * with F or V, is fully parsed.
188     *
189     * The following fields in FTPFile is used: FTPFile.Rawlisting: Always set.
190     * FTPFile.Type: DIRECTORY_TYPE or FILE_TYPE or UNKNOWN FTPFile.Name: name
191     * FTPFile.Timestamp: change time or null
192     *
193     *
194     *
195     * Additional information ======================
196     *
197     * The MVS ftp server supports a number of features via the FTP interface.
198     * The features are controlled with the FTP command quote site filetype=<SEQ|JES|DB2>
199     * SEQ is the default and used for normal file transfer JES is used to
200     * interact with the Job Entry Subsystem (JES) similar to a job scheduler
201     * DB2 is used to interact with a DB2 subsystem
202     *
203     * This parser supports SEQ and JES.
204     *
205     *
206     *
207     *
208     *
209     *
210     */
211
212    /**
213     * The sole constructor for a MVSFTPEntryParser object.
214     *
215     */
216    public MVSFTPEntryParser() {
217        super(""); // note the regex is set in preParse.
218        super.configure(null); // configure parser with default configurations
219    }
220
221    /**
222     * Parses a line of an z/OS - MVS FTP server file listing and converts it
223     * into a usable format in the form of an <code> FTPFile </code> instance.
224     * If the file listing line doesn't describe a file, then
225     * <code> null </code> is returned. Otherwise a <code> FTPFile </code>
226     * instance representing the file is returned.
227     *
228     * @param entry
229     *            A line of text from the file listing
230     * @return An FTPFile instance corresponding to the supplied entry
231     */
232//    @Override
233    public FTPFile parseFTPEntry(String entry) {
234        boolean isParsed = false;
235        FTPFile f = new FTPFile();
236
237        if (isType == FILE_LIST_TYPE) {
238            isParsed = parseFileList(f, entry);
239        } else if (isType == MEMBER_LIST_TYPE) {
240            isParsed = parseMemberList(f, entry);
241            if (!isParsed) {
242                isParsed = parseSimpleEntry(f, entry);
243            }
244        } else if (isType == UNIX_LIST_TYPE) {
245            isParsed = parseUnixList(f, entry);
246        } else if (isType == JES_LEVEL_1_LIST_TYPE) {
247            isParsed = parseJeslevel1List(f, entry);
248        } else if (isType == JES_LEVEL_2_LIST_TYPE) {
249            isParsed = parseJeslevel2List(f, entry);
250        }
251
252        if (!isParsed) {
253            f = null;
254        }
255
256        return f;
257    }
258
259    /**
260     * Parse entries representing a dataset list. Only datasets with DSORG PS or
261     * PO or PO-E and with RECFM F* or V* will be parsed.
262     *
263     * Format of ZOS/MVS file list: 1 2 3 4 5 6 7 8 9 10 Volume Unit Referred
264     * Ext Used Recfm Lrecl BlkSz Dsorg Dsname B10142 3390 2006/03/20 2 31 F 80
265     * 80 PS MDI.OKL.WORK ARCIVE Not Direct Access Device
266     * KJ.IOP998.ERROR.PL.UNITTEST B1N231 3390 2006/03/20 1 15 VB 256 27998 PO
267     * PLU B1N231 3390 2006/03/20 1 15 VB 256 27998 PO-E PLB
268     *
269     * ----------------------------------- Group within Regex [1] Volume [2]
270     * Unit [3] Referred [4] Ext: number of extents [5] Used [6] Recfm: Record
271     * format [7] Lrecl: Logical record length [8] BlkSz: Block size [9] Dsorg:
272     * Dataset organisation. Many exists but only support: PS, PO, PO-E [10]
273     * Dsname: Dataset name
274     *
275     * Note: When volume is ARCIVE, it means the dataset is stored somewhere in
276     * a tape archive. These entries is currently not supported by this parser.
277     * A null value is returned.
278     *
279     * @param file
280     *            will be updated with Name, Type, Timestamp if parsed.
281     * @param entry zosDirectoryEntry
282     * @return true: entry was parsed, false: entry was not parsed.
283     */
284    private boolean parseFileList(FTPFile file, String entry) {
285        if (matches(entry)) {
286            file.setRawListing(entry);
287            String name = group(2);
288            String dsorg = group(1);
289            file.setName(name);
290
291            // DSORG
292            if ("PS".equals(dsorg)) {
293                file.setType(FTPFile.FILE_TYPE);
294            }
295            else if ("PO".equals(dsorg) || "PO-E".equals(dsorg)) {
296                // regex already ruled out anything other than PO or PO-E
297                file.setType(FTPFile.DIRECTORY_TYPE);
298            }
299            else {
300                return false;
301            }
302
303            return true;
304        }
305
306        return false;
307    }
308
309    /**
310     * Parse entries within a partitioned dataset.
311     *
312     * Format of a memberlist within a PDS: 1 2 3 4 5 6 7 8 9 Name VV.MM Created
313     * Changed Size Init Mod Id TBSHELF 01.03 2002/09/12 2002/10/11 09:37 11 11
314     * 0 KIL001 TBTOOL 01.12 2002/09/12 2004/11/26 19:54 51 28 0 KIL001
315     *
316     * ------------------------------------------- [1] Name [2] VV.MM: Version .
317     * modification [3] Created: yyyy / MM / dd [4,5] Changed: yyyy / MM / dd
318     * HH:mm [6] Size: number of lines [7] Init: number of lines when first
319     * created [8] Mod: number of modified lines a last save [9] Id: User id for
320     * last update
321     *
322     *
323     * @param file
324     *            will be updated with Name, Type and Timestamp if parsed.
325     * @param entry zosDirectoryEntry
326     * @return true: entry was parsed, false: entry was not parsed.
327     */
328    private boolean parseMemberList(FTPFile file, String entry) {
329        if (matches(entry)) {
330            file.setRawListing(entry);
331            String name = group(1);
332            String datestr = group(2) + " " + group(3);
333            file.setName(name);
334            file.setType(FTPFile.FILE_TYPE);
335            try {
336                file.setTimestamp(super.parseTimestamp(datestr));
337            } catch (ParseException e) {
338                e.printStackTrace();
339                // just ignore parsing errors.
340                // TODO check this is ok
341                return false; // this is a parsing failure too.
342            }
343            return true;
344        }
345
346        return false;
347    }
348
349    /**
350     * Assigns the name to the first word of the entry. Only to be used from a
351     * safe context, for example from a memberlist, where the regex for some
352     * reason fails. Then just assign the name field of FTPFile.
353     *
354     * @param file
355     * @param entry
356     * @return true if the entry string is non-null and non-empty
357     */
358    private boolean parseSimpleEntry(FTPFile file, String entry) {
359        if (entry != null && entry.trim().length() > 0) {
360            file.setRawListing(entry);
361            String name = entry.split(" ")[0];
362            file.setName(name);
363            file.setType(FTPFile.FILE_TYPE);
364            return true;
365        }
366        return false;
367    }
368
369    /**
370     * Parse the entry as a standard unix file. Using the UnixFTPEntryParser.
371     *
372     * @param file
373     * @param entry
374     * @return true: entry is parsed, false: entry could not be parsed.
375     */
376    private boolean parseUnixList(FTPFile file, String entry) {
377        file = unixFTPEntryParser.parseFTPEntry(entry);
378        if (file == null) {
379            return false;
380        }
381        return true;
382    }
383
384    /**
385     * Matches these entries, note: no header: [1] [2] [3] [4] [5] IBMUSER1
386     * JOB01906 OUTPUT 3 Spool Files
387     * 012345678901234567890123456789012345678901234 1 2 3 4
388     * ------------------------------------------- Group in regex [1] Job name
389     * [2] Job number [3] Job status (INPUT,ACTIVE,OUTPUT) [4] Number of sysout
390     * files [5] The string "Spool Files"
391     *
392     *
393     * @param file
394     *            will be updated with Name, Type and Timestamp if parsed.
395     * @param entry zosDirectoryEntry
396     * @return true: entry was parsed, false: entry was not parsed.
397     */
398    private boolean parseJeslevel1List(FTPFile file, String entry) {
399        if (matches(entry)) {
400            if (group(3).equalsIgnoreCase("OUTPUT")) {
401                file.setRawListing(entry);
402                String name = group(2); /* Job Number, used by GET */
403                file.setName(name);
404                file.setType(FTPFile.FILE_TYPE);
405                return true;
406            }
407        }
408
409        return false;
410    }
411
412    /**
413     * Matches these entries, note: no header: [1] [2] [3] [4] [5] JOBNAME JOBID
414     * OWNER STATUS CLASS IBMUSER1 JOB01906 IBMUSER OUTPUT A RC=0000 3 spool
415     * files IBMUSER TSU01830 IBMUSER OUTPUT TSU ABEND=522 3 spool files
416     * 012345678901234567890123456789012345678901234 1 2 3 4
417     * ------------------------------------------- Group in regex [1] Job name
418     * [2] Job number [3] Owner [4] Job status (INPUT,ACTIVE,OUTPUT) [5] Job
419     * Class [6] The rest
420     *
421     *
422     * @param file
423     *            will be updated with Name, Type and Timestamp if parsed.
424     * @param entry zosDirectoryEntry
425     * @return true: entry was parsed, false: entry was not parsed.
426     */
427    private boolean parseJeslevel2List(FTPFile file, String entry) {
428        if (matches(entry)) {
429            if (group(4).equalsIgnoreCase("OUTPUT")) {
430                file.setRawListing(entry);
431                String name = group(2); /* Job Number, used by GET */
432                file.setName(name);
433                file.setType(FTPFile.FILE_TYPE);
434                return true;
435            }
436        }
437
438        return false;
439    }
440
441    /**
442     * preParse is called as part of the interface. Per definition is is called
443     * before the parsing takes place. Three kind of lists is recognize:
444     * z/OS-MVS File lists z/OS-MVS Member lists unix file lists
445     * @since 2.0
446     */
447    @Override
448    public List<String> preParse(List<String> orig) {
449        // simply remove the header line. Composite logic will take care of the
450        // two different types of
451        // list in short order.
452        if (orig != null && orig.size() > 0) {
453            String header = orig.get(0);
454            if (header.indexOf("Volume") >= 0 && header.indexOf("Dsname") >= 0) {
455                setType(FILE_LIST_TYPE);
456                super.setRegex(FILE_LIST_REGEX);
457            } else if (header.indexOf("Name") >= 0 && header.indexOf("Id") >= 0) {
458                setType(MEMBER_LIST_TYPE);
459                super.setRegex(MEMBER_LIST_REGEX);
460            } else if (header.indexOf("total") == 0) {
461                setType(UNIX_LIST_TYPE);
462                unixFTPEntryParser = new UnixFTPEntryParser();
463            } else if (header.indexOf("Spool Files") >= 30) {
464                setType(JES_LEVEL_1_LIST_TYPE);
465                super.setRegex(JES_LEVEL_1_LIST_REGEX);
466            } else if (header.indexOf("JOBNAME") == 0
467                    && header.indexOf("JOBID") > 8) {// header contains JOBNAME JOBID OWNER // STATUS CLASS
468                setType(JES_LEVEL_2_LIST_TYPE);
469                super.setRegex(JES_LEVEL_2_LIST_REGEX);
470            } else {
471                setType(UNKNOWN_LIST_TYPE);
472            }
473
474            if (isType != JES_LEVEL_1_LIST_TYPE) { // remove header is necessary
475                orig.remove(0);
476            }
477        }
478
479        return orig;
480    }
481
482    /**
483     * Explicitly set the type of listing being processed.
484     * @param type The listing type.
485     */
486    void setType(int type) {
487        isType = type;
488    }
489
490    /*
491     * @return
492     */
493    @Override
494    protected FTPClientConfig getDefaultConfiguration() {
495        return new FTPClientConfig(FTPClientConfig.SYST_MVS,
496                DEFAULT_DATE_FORMAT, null, null, null, null);
497    }
498
499}