001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.maven; 018 019 import org.apache.maven.plugin.AbstractMojo; 020 import org.apache.maven.plugin.MojoExecutionException; 021 022 import java.io.File; 023 import java.io.FileOutputStream; 024 import java.io.IOException; 025 import java.io.BufferedInputStream; 026 import java.io.BufferedOutputStream; 027 import java.io.FileWriter; 028 import java.io.ByteArrayOutputStream; 029 import java.io.InputStream; 030 import java.io.PrintWriter; 031 import java.util.ArrayList; 032 import java.util.Iterator; 033 import java.util.List; 034 import java.util.StringTokenizer; 035 import java.net.URL; 036 import java.net.URLConnection; 037 038 import javax.xml.transform.Transformer; 039 import javax.xml.transform.TransformerFactory; 040 import javax.xml.transform.dom.DOMSource; 041 import javax.xml.transform.stream.StreamResult; 042 import javax.xml.transform.stream.StreamSource; 043 044 import org.w3c.dom.Element; 045 import org.w3c.dom.NodeList; 046 import org.w3c.dom.Document; 047 import org.w3c.dom.Node; 048 import org.w3c.dom.NamedNodeMap; 049 050 import org.w3c.tidy.DOMElementImpl; 051 import org.w3c.tidy.Tidy; 052 053 import org.apache.commons.logging.Log; 054 import org.apache.commons.logging.LogFactory; 055 056 /** 057 * Goal which extracts the content of a wiki page and converts it to docbook 058 * format 059 * 060 * @goal htmlToDocbook 061 * @phase process-sources 062 */ 063 public class GenerateDocBookMojo extends AbstractMojo { 064 065 /** 066 * Base URL. 067 * 068 * @parameter expression="${baseURL}" 069 * default-value="http://activemq.apache.org/camel/" 070 * @required 071 */ 072 private String baseURL; 073 074 /** 075 * List of resources 076 * 077 * @parameter 078 */ 079 private String[] resources; 080 081 /** 082 * List of author's fullname 083 * 084 * @parameter 085 */ 086 private String[] authors; 087 088 /** 089 * Location of the xsl file. 090 * 091 * @parameter expression="${configDirectory}" 092 * 093 */ 094 private String xslFile; 095 096 /** 097 * Location of the output directory. 098 * 099 * @parameter expression="${project.build.directory}/docbkx/docbkx-source" 100 */ 101 private String outputPath; 102 103 /** 104 * Location of the output directory for wiki source. 105 * 106 * @parameter expression="${project.build.directory}/docbkx/wiki-source" 107 */ 108 private String wikiOutputPath; 109 110 /** 111 * @parameter expression="${title}" 112 * @required 113 */ 114 private String title; 115 116 /** 117 * @parameter expression="${subtitle}" 118 */ 119 private String subtitle; 120 121 /** 122 * @parameter expression="${mainFilename}" default-value="manual" 123 * @required 124 */ 125 private String mainFilename; 126 127 /** 128 * @parameter expression="${version}" default-value="${project.version}" 129 */ 130 private String version; 131 132 /** 133 * @parameter expression="${legalNotice}" 134 */ 135 private String legalNotice; 136 137 /** 138 * Location of image files. 139 * 140 * @parameter expression="${project.build.directory}/site/book/images" 141 * 142 */ 143 private String imageLocation; 144 145 private String chapterId; 146 147 private static final transient Log log = LogFactory 148 .getLog(GenerateDocBookMojo.class); 149 150 public void execute() throws MojoExecutionException { 151 File outputDir = new File(outputPath); 152 File wikiOutputDir = new File(wikiOutputPath); 153 File imageDir = new File(imageLocation); 154 if (!outputDir.exists()) { 155 outputDir.mkdirs(); 156 imageDir.mkdirs(); 157 wikiOutputDir.mkdirs(); 158 } 159 this.createMainXML(); 160 161 for (int i = 0; i < resources.length; ++i) { 162 this.setChapterId(removeExtension(resources[i])); 163 164 process(resources[i]); 165 } 166 167 } 168 169 /** 170 * Extract the wiki content and tranform it into docbook format 171 * 172 * @param resource 173 */ 174 public void process(String resource) { 175 176 Tidy tidy = new Tidy(); 177 ByteArrayOutputStream out = null; 178 BufferedOutputStream output = null; 179 BufferedOutputStream wikiOutput = null; 180 StreamSource streamSource = null; 181 182 tidy.setXmlOut(true); 183 try { 184 out = new ByteArrayOutputStream(); 185 URL u = new URL(baseURL + resource); 186 Document doc = tidy.parseDOM( 187 new BufferedInputStream(u.openStream()), out); 188 out.close(); 189 // let's extract the div element with class="wiki-content 190 // maincontent" 191 NodeList nodeList = doc.getElementsByTagName("div"); 192 for (int i = 0; i < nodeList.getLength(); ++i) { 193 Node node = nodeList.item(i); 194 195 NamedNodeMap nm = node.getAttributes(); 196 Node attr = nm.getNamedItem("class"); 197 198 if (attr != null 199 && attr.getNodeValue().equalsIgnoreCase( 200 "wiki-content maincontent")) { 201 downloadImages(node); 202 // These attributes will be used by xsl to 203 Element element = (Element) node; 204 element.setAttribute("chapterId", chapterId); 205 element.setAttribute("baseURL", baseURL); 206 element.setAttribute("imageLocation", "../images/"); 207 208 DOMSource source = new DOMSource( 209 processH2Section(doc, node)); 210 211 output = new BufferedOutputStream(new FileOutputStream( 212 outputPath + File.separator 213 + removeExtension(resource) + ".xml")); 214 StreamResult result = new StreamResult(output); 215 TransformerFactory tFactory = TransformerFactory 216 .newInstance(); 217 if (xslFile != null && !xslFile.trim().equals("")) { 218 streamSource = new StreamSource(xslFile); 219 } else { 220 InputStream xslStream = getClass().getResourceAsStream( 221 "/docbook.xsl"); 222 streamSource = new StreamSource(xslStream); 223 } 224 225 Transformer transformer = tFactory 226 .newTransformer(streamSource); 227 transformer.transform(source, result); 228 229 // generate the wiki source for debugging 230 wikiOutput = new BufferedOutputStream(new FileOutputStream( 231 wikiOutputPath + File.separator 232 + removeExtension(resource) + ".html")); 233 result = new StreamResult(wikiOutput); 234 transformer = tFactory.newTransformer(); 235 transformer.transform(source, result); 236 237 break; 238 } 239 240 } 241 242 } catch (Exception e) { 243 log.debug("Exception processing wiki content", e); 244 } finally { 245 try { 246 if (output != null) 247 output.close(); 248 } catch (IOException e) { 249 // TODO Auto-generated catch block 250 log.debug("Exception closing output stream", e); 251 } 252 } 253 } 254 255 /* 256 * create the main docbook xml file 257 */ 258 public void createMainXML() { 259 try { 260 261 PrintWriter out = new PrintWriter(new FileWriter(outputPath 262 + File.separator + mainFilename + ".xml")); 263 264 out 265 .println("<!DOCTYPE book PUBLIC \"-//OASIS//DTD DocBook XML V4.4//EN\" \"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd\" "); 266 out.println("["); 267 268 for (int i = 0; i < resources.length; ++i) { 269 out.println("<!ENTITY " + removeExtension(resources[i]) 270 + " SYSTEM \"" + removeExtension(resources[i]) 271 + ".xml\">"); 272 } 273 274 out.println("]>"); 275 out.println("<book>"); 276 out.println("<bookinfo>"); 277 out.println("<title>" + title + "</title>"); 278 out.println("<subtitle>" + subtitle + "</subtitle>"); 279 out.println("<releaseinfo>" + version + "</releaseinfo>"); 280 out.println(" <authorgroup>"); 281 if (authors != null) { 282 for (int i = 0; i < authors.length; ++i) { 283 StringTokenizer name = new StringTokenizer(authors[i]); 284 String fname = name.nextToken(); 285 String lname = ""; 286 if (name.hasMoreTokens()) { 287 lname = name.nextToken(); 288 } 289 out.println("<author>"); 290 out.println("<firstname>" + fname + "</firstname>"); 291 out.println("<surname>" + lname + "</surname>"); 292 out.println("</author>"); 293 294 } 295 } 296 297 out.println("</authorgroup>"); 298 out.println("<legalnotice>"); 299 if (legalNotice != null && legalNotice.length() > 0) { 300 out.println("<para>"); 301 out.println(legalNotice); 302 out.println("</para>"); 303 } else { 304 out 305 .println("<para>Licensed to the Apache Software Foundation (ASF) under one or more"); 306 out 307 .println("contributor license agreements. See the NOTICE file distributed with"); 308 out 309 .println("this work for additional information regarding copyright ownership. The"); 310 out 311 .println("ASF licenses this file to You under the Apache License, Version 2.0 (the"); 312 out 313 .println("\"License\"); you may not use this file except in compliance with the"); 314 out 315 .println("License. You may obtain a copy of the License at</para>"); 316 out 317 .println("<para>http://www.apache.org/licenses/LICENSE-2.0</para>"); 318 out 319 .println("<para>Unless required by applicable law or agreed to in writing,"); 320 out 321 .println(" software distributed under the License is distributed on an \"AS IS\""); 322 out 323 .println("BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or"); 324 out 325 .println("implied. See the License for the specific language governing permissions"); 326 out.println("and limitations under the License.</para>"); 327 } 328 329 out.println("</legalnotice>"); 330 out.println("</bookinfo>"); 331 out.println("<toc></toc>"); 332 333 for (int i = 0; i < resources.length; ++i) { 334 out.println("&" + removeExtension(resources[i]) + ";"); 335 } 336 337 out.println("</book>"); 338 out.flush(); 339 out.close(); 340 341 } catch (IOException e) { 342 // TODO Auto-generated catch block 343 log.debug("Exception in creating manual.xml file", e); 344 } 345 } 346 347 public void downloadImages(Node node) { 348 List<String> imageList = getImageUrls(node); 349 Iterator<String> iter = imageList.iterator(); 350 while (iter.hasNext()) { 351 String imageUrl = (String) iter.next(); 352 String imageFile = "imageFile"; 353 354 //check if url path is relative 355 if (imageUrl.indexOf("http://") < 0) { 356 imageUrl = baseURL + imageUrl; 357 } 358 try { 359 360 URL url = new URL(imageUrl); 361 StringTokenizer st = new StringTokenizer(url.getFile(), "/"); 362 while (st.hasMoreTokens()) { 363 imageFile = st.nextToken(); 364 } 365 366 URLConnection connection = url.openConnection(); 367 InputStream stream = connection.getInputStream(); 368 BufferedInputStream in = new BufferedInputStream(stream); 369 FileOutputStream file = new FileOutputStream(imageLocation 370 + File.separator + imageFile); 371 BufferedOutputStream out = new BufferedOutputStream(file); 372 int i; 373 while ((i = in.read()) != -1) { 374 out.write(i); 375 } 376 out.flush(); 377 } catch (Exception e) { 378 log.debug("Exception in downloading image " + imageFile, e); 379 } 380 381 } 382 } 383 384 public List<String> getImageUrls(Node node) { 385 List<String> list = new ArrayList<String>(); 386 DOMElementImpl doc = (DOMElementImpl) node; 387 NodeList imageList = doc.getElementsByTagName("img"); 388 389 if (imageList != null) { 390 for (int i = 0; i < imageList.getLength(); ++i) { 391 Node imageNode = imageList.item(i); 392 393 NamedNodeMap nm = imageNode.getAttributes(); 394 Node attr = nm.getNamedItem("src"); 395 if (attr != null) { 396 list.add(attr.getNodeValue()); 397 } 398 399 } 400 } 401 return list; 402 } 403 404 public String getChapterId() { 405 return chapterId; 406 } 407 408 public void setChapterId(String chapterId) { 409 this.chapterId = chapterId; 410 } 411 412 public String removeExtension(String resource) { 413 int index = resource.indexOf('.'); 414 return resource.substring(0, index); 415 } 416 417 /* 418 * creates a <h2_section> node and place all nodes after a <h2> node until another <h2> node is found. 419 * This is so that we can divide chapter contents into section delimited by a <h2> node 420 */ 421 422 public Node processH2Section(Document doc, Node node) { 423 NodeList nodeList = node.getChildNodes(); 424 Node h2Node = null; 425 Node pNode = null; 426 boolean firstInstanceOfH2 = false; 427 428 for (int x = 0; x < nodeList.getLength(); ++x) { 429 Node node2 = nodeList.item(x); 430 431 if (node2 != null) { 432 String nodes = node2.getNodeName(); 433 434 if (nodes.equalsIgnoreCase("h2")) { 435 h2Node = node2.appendChild(doc.createElement("h2_section")); 436 } else { 437 //if first node is not a <p> or a h2 node, create a <p> node and place all succeeding nodes 438 //inside this node until a <p> or <h2> node is found 439 if (x == 0 && !nodes.equalsIgnoreCase("p") 440 && !nodes.equalsIgnoreCase("h2")) { 441 pNode = node 442 .insertBefore(doc.createElement("p"), node2); 443 x++; 444 firstInstanceOfH2 = true; 445 } 446 if (firstInstanceOfH2) { 447 if (node2 == node.getLastChild()) { 448 pNode.appendChild(node2.cloneNode(true)); 449 } else { 450 Node nextNode = node2.getNextSibling(); 451 pNode.appendChild(node2.cloneNode(true)); 452 if (nextNode.getNodeName().equalsIgnoreCase("h2") 453 || nextNode.getNodeName().equalsIgnoreCase( 454 "p")) { 455 firstInstanceOfH2 = false; 456 } 457 } 458 459 } 460 461 if (h2Node != null) { 462 h2Node.appendChild(node2.cloneNode(true)); 463 } 464 } 465 466 } 467 } 468 469 //let's remove all nodes that are not <h2> or <p> - they should already have been copied inside an <h2> or <p> node 470 NodeList nodeList3 = node.getChildNodes(); 471 boolean afterH2 = false; 472 for (int x = 0; x < nodeList3.getLength(); ++x) { 473 Node node2 = nodeList3.item(x); 474 if (node2.getNodeName().equalsIgnoreCase("h2") && !afterH2) { 475 afterH2 = true; 476 } 477 478 if (node2 != null && !node2.getNodeName().equalsIgnoreCase("p") 479 && !node2.getNodeName().equalsIgnoreCase("h2")) { 480 node.removeChild(node2); 481 x--; 482 } 483 } 484 return node; 485 } 486 }