001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 *
017 */
018
019package org.apache.commons.compress.archivers.zip;
020
021import org.apache.commons.compress.archivers.ArchiveStreamFactory;
022import org.apache.commons.compress.utils.FileNameUtils;
023import org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel;
024
025import java.io.File;
026import java.io.IOException;
027import java.io.Serializable;
028import java.nio.ByteBuffer;
029import java.nio.channels.SeekableByteChannel;
030import java.nio.file.Files;
031import java.nio.file.StandardOpenOption;
032import java.util.ArrayList;
033import java.util.Arrays;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.List;
037import java.util.Objects;
038import java.util.regex.Pattern;
039
040/**
041 * {@link MultiReadOnlySeekableByteChannel} that knows what a split ZIP archive should look like.
042 *
043 * <p>If you want to read a split archive using {@link ZipFile} then create an instance of this class from the parts of
044 * the archive.</p>
045 *
046 * @since 1.20
047 */
048public class ZipSplitReadOnlySeekableByteChannel extends MultiReadOnlySeekableByteChannel {
049    private static final int ZIP_SPLIT_SIGNATURE_LENGTH = 4;
050    private final ByteBuffer zipSplitSignatureByteBuffer =
051        ByteBuffer.allocate(ZIP_SPLIT_SIGNATURE_LENGTH);
052
053    /**
054     * Concatenates the given channels.
055     *
056     * <p>The channels should be add in ascending order, e.g. z01,
057     * z02, ... z99, zip please note that the .zip file is the last
058     * segment and should be added as the last one in the channels</p>
059     *
060     * @param channels the channels to concatenate
061     * @throws NullPointerException if channels is null
062     * @throws IOException if the first channel doesn't seem to hold
063     * the beginning of a split archive
064     */
065    public ZipSplitReadOnlySeekableByteChannel(List<SeekableByteChannel> channels)
066        throws IOException {
067        super(channels);
068
069        // the first split zip segment should begin with zip split signature
070        assertSplitSignature(channels);
071    }
072
073    /**
074     * Based on the zip specification:
075     *
076     * <p>
077     * 8.5.3 Spanned/Split archives created using PKZIP for Windows
078     * (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
079     * or PKZIP Explorer will include a special spanning
080     * signature as the first 4 bytes of the first segment of
081     * the archive.  This signature (0x08074b50) will be
082     * followed immediately by the local header signature for
083     * the first file in the archive.
084     *
085     * <p>
086     * the first 4 bytes of the first zip split segment should be the zip split signature(0x08074B50)
087     *
088     * @param channels channels to be valided
089     * @throws IOException
090     */
091    private void assertSplitSignature(final List<SeekableByteChannel> channels)
092        throws IOException {
093        SeekableByteChannel channel = channels.get(0);
094        // the zip split file signature is at the beginning of the first split segment
095        channel.position(0L);
096
097        zipSplitSignatureByteBuffer.rewind();
098        channel.read(zipSplitSignatureByteBuffer);
099        final ZipLong signature = new ZipLong(zipSplitSignatureByteBuffer.array());
100        if (!signature.equals(ZipLong.DD_SIG)) {
101            channel.position(0L);
102            throw new IOException("The first zip split segment does not begin with split zip file signature");
103        }
104
105        channel.position(0L);
106    }
107
108    /**
109     * Concatenates the given channels.
110     *
111     * @param channels the channels to concatenate, note that the LAST CHANNEL of channels should be the LAST SEGMENT(.zip)
112     *                 and theses channels should be added in correct order (e.g. .z01, .z02... .z99, .zip)
113     * @return SeekableByteChannel that concatenates all provided channels
114     * @throws NullPointerException if channels is null
115     * @throws IOException if reading channels fails
116     */
117    public static SeekableByteChannel forOrderedSeekableByteChannels(SeekableByteChannel... channels) throws IOException {
118        if (Objects.requireNonNull(channels, "channels must not be null").length == 1) {
119            return channels[0];
120        }
121        return new ZipSplitReadOnlySeekableByteChannel(Arrays.asList(channels));
122    }
123
124    /**
125     * Concatenates the given channels.
126     *
127     * @param lastSegmentChannel channel of the last segment of split zip segments, its extension should be .zip
128     * @param channels           the channels to concatenate except for the last segment,
129     *                           note theses channels should be added in correct order (e.g. .z01, .z02... .z99)
130     * @return SeekableByteChannel that concatenates all provided channels
131     * @throws NullPointerException if lastSegmentChannel or channels is null
132     * @throws IOException if the first channel doesn't seem to hold
133     * the beginning of a split archive
134     */
135    public static SeekableByteChannel forOrderedSeekableByteChannels(SeekableByteChannel lastSegmentChannel,
136        Iterable<SeekableByteChannel> channels) throws IOException {
137        Objects.requireNonNull(channels, "channels");
138        Objects.requireNonNull(lastSegmentChannel, "lastSegmentChannel");
139
140        List<SeekableByteChannel> channelsList = new ArrayList<>();
141        for (SeekableByteChannel channel : channels) {
142            channelsList.add(channel);
143        }
144        channelsList.add(lastSegmentChannel);
145
146        SeekableByteChannel[] channelArray = new SeekableByteChannel[channelsList.size()];
147        return forOrderedSeekableByteChannels(channelsList.toArray(channelArray));
148    }
149
150    /**
151     * Concatenates zip split files from the last segment(the extension SHOULD be .zip)
152     *
153     * @param lastSegmentFile the last segment of zip split files, note that the extension SHOULD be .zip
154     * @return SeekableByteChannel that concatenates all zip split files
155     * @throws IllegalArgumentException if the lastSegmentFile's extension is NOT .zip
156     * @throws IOException if the first channel doesn't seem to hold
157     * the beginning of a split archive
158     */
159    public static SeekableByteChannel buildFromLastSplitSegment(File lastSegmentFile) throws IOException {
160        String extension = FileNameUtils.getExtension(lastSegmentFile.getCanonicalPath());
161        if (!extension.equalsIgnoreCase(ArchiveStreamFactory.ZIP)) {
162            throw new IllegalArgumentException("The extension of last zip split segment should be .zip");
163        }
164
165        File parent = lastSegmentFile.getParentFile();
166        String fileBaseName = FileNameUtils.getBaseName(lastSegmentFile.getCanonicalPath());
167        ArrayList<File> splitZipSegments = new ArrayList<>();
168
169        // zip split segments should be like z01,z02....z(n-1) based on the zip specification
170        Pattern pattern = Pattern.compile(Pattern.quote(fileBaseName) + ".[zZ][0-9]+");
171        final File[] children = parent.listFiles();
172        if (children != null) {
173            for (File file : children) {
174                if (!pattern.matcher(file.getName()).matches()) {
175                    continue;
176                }
177
178                splitZipSegments.add(file);
179            }
180        }
181
182        Collections.sort(splitZipSegments, new ZipSplitSegmentComparator());
183        return forFiles(lastSegmentFile, splitZipSegments);
184    }
185
186    /**
187     * Concatenates the given files.
188     *
189     * @param files the files to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip)
190     *              and theses files should be added in correct order (e.g. .z01, .z02... .z99, .zip)
191     * @return SeekableByteChannel that concatenates all provided files
192     * @throws NullPointerException if files is null
193     * @throws IOException          if opening a channel for one of the files fails
194     * @throws IOException if the first channel doesn't seem to hold
195     * the beginning of a split archive
196     */
197    public static SeekableByteChannel forFiles(File... files) throws IOException {
198        List<SeekableByteChannel> channels = new ArrayList<>();
199        for (File f : Objects.requireNonNull(files, "files must not be null")) {
200            channels.add(Files.newByteChannel(f.toPath(), StandardOpenOption.READ));
201        }
202        if (channels.size() == 1) {
203            return channels.get(0);
204        }
205        return new ZipSplitReadOnlySeekableByteChannel(channels);
206    }
207
208    /**
209     * Concatenates the given files.
210     *
211     * @param lastSegmentFile the last segment of split zip segments, its extension should be .zip
212     * @param files           the files to concatenate except for the last segment,
213     *                        note theses files should be added in correct order (e.g. .z01, .z02... .z99)
214     * @return SeekableByteChannel that concatenates all provided files
215     * @throws IOException if the first channel doesn't seem to hold
216     * the beginning of a split archive
217     * @throws NullPointerException if files or lastSegmentFile is null
218     */
219    public static SeekableByteChannel forFiles(File lastSegmentFile, Iterable<File> files) throws IOException {
220        Objects.requireNonNull(files, "files");
221        Objects.requireNonNull(lastSegmentFile, "lastSegmentFile");
222
223        List<File> filesList = new ArrayList<>();
224        for (File f : files) {
225            filesList.add(f);
226        }
227        filesList.add(lastSegmentFile);
228
229        File[] filesArray = new File[filesList.size()];
230        return forFiles(filesList.toArray(filesArray));
231    }
232
233    private static class ZipSplitSegmentComparator implements Comparator<File>, Serializable {
234        private static final long serialVersionUID = 20200123L;
235        @Override
236        public int compare(File file1, File file2) {
237            String extension1 = FileNameUtils.getExtension(file1.getPath());
238            String extension2 = FileNameUtils.getExtension(file2.getPath());
239
240            if (!extension1.startsWith("z")) {
241                return -1;
242            }
243
244            if (!extension2.startsWith("z")) {
245                return 1;
246            }
247
248            Integer splitSegmentNumber1 = Integer.parseInt(extension1.substring(1));
249            Integer splitSegmentNumber2 = Integer.parseInt(extension2.substring(1));
250
251            return splitSegmentNumber1.compareTo(splitSegmentNumber2);
252        }
253    }
254}