001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.maven;
018    
019    import org.apache.maven.plugin.AbstractMojo;
020    import org.apache.maven.plugin.MojoExecutionException;
021    
022    import java.io.File;
023    import java.io.FileOutputStream;
024    import java.io.IOException;
025    import java.io.BufferedInputStream;
026    import java.io.BufferedOutputStream;
027    import java.io.FileWriter;
028    import java.io.ByteArrayOutputStream;
029    import java.io.InputStream;
030    import java.io.PrintWriter;
031    import java.util.ArrayList;
032    import java.util.Iterator;
033    import java.util.List;
034    import java.util.StringTokenizer;
035    import java.net.URL;
036    import java.net.URLConnection;
037    
038    import javax.xml.transform.Transformer;
039    import javax.xml.transform.TransformerFactory;
040    import javax.xml.transform.dom.DOMSource;
041    import javax.xml.transform.stream.StreamResult;
042    import javax.xml.transform.stream.StreamSource;
043    
044    import org.w3c.dom.Element;
045    import org.w3c.dom.NodeList;
046    import org.w3c.dom.Document;
047    import org.w3c.dom.Node;
048    import org.w3c.dom.NamedNodeMap;
049    
050    import org.w3c.tidy.DOMElementImpl;
051    import org.w3c.tidy.Tidy;
052    
053    import org.apache.commons.logging.Log;
054    import org.apache.commons.logging.LogFactory;
055    
056    /**
057     * Goal which extracts the content of a wiki page and converts it to docbook
058     * format
059     * 
060     * @goal htmlToDocbook
061     * @phase process-sources
062     */
063    public class GenerateDocBookMojo extends AbstractMojo {
064    
065            /**
066             * Base URL.
067             * 
068             * @parameter expression="${baseURL}"
069             *            default-value="http://activemq.apache.org/camel/"
070             * @required
071             */
072            private String baseURL;
073    
074            /**
075             * List of resources
076             * 
077             * @parameter
078             */
079            private String[] resources;
080    
081            /**
082             * List of author's fullname
083             * 
084             * @parameter
085             */
086            private String[] authors;
087    
088            /**
089             * Location of the xsl file.
090             * 
091             * @parameter expression="${configDirectory}"
092             *           
093             */
094            private String xslFile;
095    
096            /**
097             * Location of the output directory.
098             * 
099             * @parameter expression="${project.build.directory}/docbkx/docbkx-source"
100             */
101            private String outputPath;
102    
103            /**
104             * Location of the output directory for wiki source.
105             * 
106             * @parameter expression="${project.build.directory}/docbkx/wiki-source"
107             */
108            private String wikiOutputPath;
109    
110            /**
111             * @parameter expression="${title}"
112             * @required
113             */
114            private String title;
115    
116            /**
117             * @parameter expression="${subtitle}"
118             */
119            private String subtitle;
120    
121            /**
122             * @parameter expression="${mainFilename}" default-value="manual"
123             * @required
124             */
125            private String mainFilename;
126    
127            /**
128             * @parameter expression="${version}" default-value="${project.version}"
129             */
130            private String version;
131    
132            /**
133             * @parameter expression="${legalNotice}"
134             */
135            private String legalNotice;
136    
137            /**
138             * Location of image files.
139             * 
140             * @parameter expression="${project.build.directory}/site/book/images"
141             *            
142             */
143            private String imageLocation;
144    
145            private String chapterId;
146    
147            private static final transient Log log = LogFactory
148                            .getLog(GenerateDocBookMojo.class);
149    
150            public void execute() throws MojoExecutionException {
151                    File outputDir = new File(outputPath);
152                    File wikiOutputDir = new File(wikiOutputPath);
153                    File imageDir = new File(imageLocation);
154                    if (!outputDir.exists()) {
155                            outputDir.mkdirs();
156                            imageDir.mkdirs();
157                            wikiOutputDir.mkdirs();
158                    }
159                    this.createMainXML();
160    
161                    for (int i = 0; i < resources.length; ++i) {
162                            this.setChapterId(removeExtension(resources[i]));
163    
164                            process(resources[i]);
165                    }
166    
167            }
168    
169            /**
170             * Extract the wiki content and tranform it into docbook format
171             * 
172             * @param resource
173             */
174            public void process(String resource) {
175    
176                    Tidy tidy = new Tidy();
177                    ByteArrayOutputStream out = null;
178                    BufferedOutputStream output = null;
179                    BufferedOutputStream wikiOutput = null;
180                    StreamSource streamSource = null;
181    
182                    tidy.setXmlOut(true);
183                    try {
184                            out = new ByteArrayOutputStream();
185                            URL u = new URL(baseURL + resource);
186                            Document doc = tidy.parseDOM(
187                                            new BufferedInputStream(u.openStream()), out);
188                            out.close();
189                            // let's extract the div element with class="wiki-content
190                            // maincontent"
191                            NodeList nodeList = doc.getElementsByTagName("div");
192                            for (int i = 0; i < nodeList.getLength(); ++i) {
193                                    Node node = nodeList.item(i);
194    
195                                    NamedNodeMap nm = node.getAttributes();
196                                    Node attr = nm.getNamedItem("class");
197    
198                                    if (attr != null
199                                                    && attr.getNodeValue().equalsIgnoreCase(
200                                                                    "wiki-content maincontent")) {
201                                            downloadImages(node);
202                                            // These attributes will be used by xsl to
203                                            Element element = (Element) node;
204                                            element.setAttribute("chapterId", chapterId);
205                                            element.setAttribute("baseURL", baseURL);
206                                            element.setAttribute("imageLocation", "../images/");
207    
208                                            DOMSource source = new DOMSource(
209                                                            processH2Section(doc, node));
210    
211                                            output = new BufferedOutputStream(new FileOutputStream(
212                                                            outputPath + File.separator
213                                                                            + removeExtension(resource) + ".xml"));
214                                            StreamResult result = new StreamResult(output);
215                                            TransformerFactory tFactory = TransformerFactory
216                                                            .newInstance();
217                                            if (xslFile != null && !xslFile.trim().equals("")) {
218                                                    streamSource = new StreamSource(xslFile);
219                                            } else {
220                                                    InputStream xslStream = getClass().getResourceAsStream(
221                                                                    "/docbook.xsl");
222                                                    streamSource = new StreamSource(xslStream);
223                                            }
224    
225                                            Transformer transformer = tFactory
226                                                            .newTransformer(streamSource);
227                                            transformer.transform(source, result);
228    
229                                            // generate the wiki source for debugging
230                                            wikiOutput = new BufferedOutputStream(new FileOutputStream(
231                                                            wikiOutputPath + File.separator
232                                                                            + removeExtension(resource) + ".html"));
233                                            result = new StreamResult(wikiOutput);
234                                            transformer = tFactory.newTransformer();
235                                            transformer.transform(source, result);
236    
237                                            break;
238                                    }
239    
240                            }
241    
242                    } catch (Exception e) {
243                            log.debug("Exception processing wiki content", e);
244                    } finally {
245                            try {
246                                    if (output != null)
247                                            output.close();
248                            } catch (IOException e) {
249                                    // TODO Auto-generated catch block
250                                    log.debug("Exception closing output stream", e);
251                            }
252                    }
253            }
254    
255            /*
256             *  create the main docbook xml file 
257             */
258            public void createMainXML() {
259                    try {
260    
261                            PrintWriter out = new PrintWriter(new FileWriter(outputPath
262                                            + File.separator + mainFilename + ".xml"));
263    
264                            out
265                                            .println("<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\" ");
266                            out.println("[");
267    
268                            for (int i = 0; i < resources.length; ++i) {
269                                    out.println("<!ENTITY " + removeExtension(resources[i])
270                                                    + " SYSTEM \"" + removeExtension(resources[i])
271                                                    + ".xml\">");
272                            }
273    
274                            out.println("]>");
275                            out.println("<book>");
276                            out.println("<bookinfo>");
277                            out.println("<title>" + title + "</title>");
278                            out.println("<subtitle>" + subtitle + "</subtitle>");
279                            out.println("<releaseinfo>" + version + "</releaseinfo>");
280                            out.println(" <authorgroup>");
281                            if (authors != null) {
282                                    for (int i = 0; i < authors.length; ++i) {
283                                            StringTokenizer name = new StringTokenizer(authors[i]);
284                                            String fname = name.nextToken();
285                                            String lname = "";
286                                            if (name.hasMoreTokens()) {
287                                                    lname = name.nextToken();
288                                            }
289                                            out.println("<author>");
290                                            out.println("<firstname>" + fname + "</firstname>");
291                                            out.println("<surname>" + lname + "</surname>");
292                                            out.println("</author>");
293    
294                                    }
295                            }
296    
297                            out.println("</authorgroup>");
298                            out.println("<legalnotice>");
299                            if (legalNotice != null && legalNotice.length() > 0) {
300                                    out.println("<para>");
301                                    out.println(legalNotice);
302                                    out.println("</para>");
303                            } else {
304                                    out
305                                                    .println("<para>Licensed to the Apache Software Foundation (ASF) under one or more");
306                                    out
307                                                    .println("contributor license agreements. See the NOTICE file distributed with");
308                                    out
309                                                    .println("this work for additional information regarding copyright ownership. The");
310                                    out
311                                                    .println("ASF licenses this file to You under the Apache License, Version 2.0 (the");
312                                    out
313                                                    .println("\"License\"); you may not use this file except in compliance with the");
314                                    out
315                                                    .println("License. You may obtain a copy of the License at</para>");
316                                    out
317                                                    .println("<para>http://www.apache.org/licenses/LICENSE-2.0</para>");
318                                    out
319                                                    .println("<para>Unless required by applicable law or agreed to in writing,");
320                                    out
321                                                    .println(" software distributed under the License is distributed on an \"AS IS\"");
322                                    out
323                                                    .println("BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or");
324                                    out
325                                                    .println("implied. See the License for the specific language governing permissions");
326                                    out.println("and limitations under the License.</para>");
327                            }
328    
329                            out.println("</legalnotice>");
330                            out.println("</bookinfo>");
331                            out.println("<toc></toc>");
332    
333                            for (int i = 0; i < resources.length; ++i) {
334                                    out.println("&" + removeExtension(resources[i]) + ";");
335                            }
336    
337                            out.println("</book>");
338                            out.flush();
339                            out.close();
340    
341                    } catch (IOException e) {
342                            // TODO Auto-generated catch block
343                            log.debug("Exception in creating manual.xml file", e);
344                    }
345            }
346    
347            public void downloadImages(Node node) {
348                    List<String> imageList = getImageUrls(node);
349                    Iterator<String> iter = imageList.iterator();
350                    while (iter.hasNext()) {
351                            String imageUrl = (String) iter.next();
352                            String imageFile = "imageFile";
353    
354                            //check if url path is relative
355                            if (imageUrl.indexOf("http://") < 0) {
356                                    imageUrl = baseURL + imageUrl;
357                            }
358                            try {
359    
360                                    URL url = new URL(imageUrl);
361                                    StringTokenizer st = new StringTokenizer(url.getFile(), "/");
362                                    while (st.hasMoreTokens()) {
363                                            imageFile = st.nextToken();
364                                    }
365    
366                                    URLConnection connection = url.openConnection();
367                                    InputStream stream = connection.getInputStream();
368                                    BufferedInputStream in = new BufferedInputStream(stream);
369                                    FileOutputStream file = new FileOutputStream(imageLocation
370                                                    + File.separator + imageFile);
371                                    BufferedOutputStream out = new BufferedOutputStream(file);
372                                    int i;
373                                    while ((i = in.read()) != -1) {
374                                            out.write(i);
375                                    }
376                                    out.flush();
377                            } catch (Exception e) {
378                                    log.debug("Exception in downloading image " + imageFile, e);
379                            }
380    
381                    }
382            }
383    
384            public List<String> getImageUrls(Node node) {
385                    List<String> list = new ArrayList<String>();
386                    DOMElementImpl doc = (DOMElementImpl) node;
387                    NodeList imageList = doc.getElementsByTagName("img");
388    
389                    if (imageList != null) {
390                            for (int i = 0; i < imageList.getLength(); ++i) {
391                                    Node imageNode = imageList.item(i);
392    
393                                    NamedNodeMap nm = imageNode.getAttributes();
394                                    Node attr = nm.getNamedItem("src");
395                                    if (attr != null) {
396                                            list.add(attr.getNodeValue());
397                                    }
398    
399                            }
400                    }
401                    return list;
402            }
403    
404            public String getChapterId() {
405                    return chapterId;
406            }
407    
408            public void setChapterId(String chapterId) {
409                    this.chapterId = chapterId;
410            }
411    
412            public String removeExtension(String resource) {
413                    int index = resource.indexOf('.');
414                    return resource.substring(0, index);
415            }
416    
417            /*
418             * creates a <h2_section> node  and place all nodes  after a <h2> node until another <h2> node is found. 
419             * This is so that we can divide chapter contents into section delimited by a <h2> node
420             */
421    
422            public Node processH2Section(Document doc, Node node) {
423                    NodeList nodeList = node.getChildNodes();
424                    Node h2Node = null;
425                    Node pNode = null;
426                    boolean firstInstanceOfH2 = false;
427    
428                    for (int x = 0; x < nodeList.getLength(); ++x) {
429                            Node node2 = nodeList.item(x);
430    
431                            if (node2 != null) {
432                                    String nodes = node2.getNodeName();
433    
434                                    if (nodes.equalsIgnoreCase("h2")) {
435                                            h2Node = node2.appendChild(doc.createElement("h2_section"));
436                                    } else {
437                                            //if first node is not a <p> or a h2 node, create a <p> node and place all succeeding nodes 
438                                            //inside this node until a <p> or <h2> node is found
439                                            if (x == 0 && !nodes.equalsIgnoreCase("p")
440                                                            && !nodes.equalsIgnoreCase("h2")) {
441                                                    pNode = node
442                                                                    .insertBefore(doc.createElement("p"), node2);
443                                                    x++;
444                                                    firstInstanceOfH2 = true;
445                                            }
446                                            if (firstInstanceOfH2) {
447                                                    if (node2 == node.getLastChild()) {
448                                                            pNode.appendChild(node2.cloneNode(true));
449                                                    } else {
450                                                            Node nextNode = node2.getNextSibling();
451                                                            pNode.appendChild(node2.cloneNode(true));
452                                                            if (nextNode.getNodeName().equalsIgnoreCase("h2")
453                                                                            || nextNode.getNodeName().equalsIgnoreCase(
454                                                                                            "p")) {
455                                                                    firstInstanceOfH2 = false;
456                                                            }
457                                                    }
458    
459                                            }
460    
461                                            if (h2Node != null) {
462                                                    h2Node.appendChild(node2.cloneNode(true));
463                                            }
464                                    }
465    
466                            }
467                    }
468    
469                    //let's remove all  nodes that are not <h2> or <p> - they should already have been copied inside an <h2> or <p> node
470                    NodeList nodeList3 = node.getChildNodes();
471                    boolean afterH2 = false;
472                    for (int x = 0; x < nodeList3.getLength(); ++x) {
473                            Node node2 = nodeList3.item(x);
474                            if (node2.getNodeName().equalsIgnoreCase("h2") && !afterH2) {
475                                    afterH2 = true;
476                            }
477    
478                            if (node2 != null && !node2.getNodeName().equalsIgnoreCase("p")
479                                            && !node2.getNodeName().equalsIgnoreCase("h2")) {
480                                    node.removeChild(node2);
481                                    x--;
482                            }
483                    }
484                    return node;
485            }
486    }