1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.jetspeed.portlet;
18
19 import java.io.BufferedInputStream;
20 import java.io.ByteArrayInputStream;
21 import java.io.ByteArrayOutputStream;
22 import java.io.FileReader;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.OutputStreamWriter;
27 import java.io.PrintWriter;
28 import java.io.Reader;
29 import java.io.StringWriter;
30 import java.io.UnsupportedEncodingException;
31 import java.io.Writer;
32 import java.net.URL;
33 import java.util.ArrayList;
34 import java.util.Arrays;
35 import java.util.HashMap;
36 import java.util.Iterator;
37 import java.util.Map;
38 import java.util.StringTokenizer;
39
40 import javax.portlet.ActionRequest;
41 import javax.portlet.ActionResponse;
42 import javax.portlet.PortletConfig;
43 import javax.portlet.PortletContext;
44 import javax.portlet.PortletException;
45 import javax.portlet.PortletMode;
46 import javax.portlet.PortletURL;
47 import javax.portlet.RenderRequest;
48 import javax.portlet.RenderResponse;
49
50 import org.apache.commons.httpclient.Cookie;
51 import org.apache.commons.httpclient.Header;
52 import org.apache.commons.httpclient.HttpClient;
53 import org.apache.commons.httpclient.HttpMethod;
54 import org.apache.commons.httpclient.HttpMethodBase;
55 import org.apache.commons.httpclient.NameValuePair;
56 import org.apache.commons.httpclient.methods.GetMethod;
57 import org.apache.commons.httpclient.methods.PostMethod;
58 import org.apache.commons.logging.Log;
59 import org.apache.commons.logging.LogFactory;
60 import org.apache.jetspeed.portlet.webcontent.WebContentHistoryList;
61 import org.apache.jetspeed.portlet.webcontent.WebContentHistoryPage;
62 import org.apache.jetspeed.rewriter.JetspeedRewriterController;
63 import org.apache.jetspeed.rewriter.RewriterController;
64 import org.apache.jetspeed.rewriter.RewriterException;
65 import org.apache.jetspeed.rewriter.RulesetRewriter;
66 import org.apache.jetspeed.rewriter.WebContentRewriter;
67 import org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor;
68 import org.apache.jetspeed.rewriter.rules.Ruleset;
69 import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
70 import org.apache.portals.bridges.velocity.GenericVelocityPortlet;
71 import org.apache.portals.messaging.PortletMessaging;
72
73
74 /***
75 * WebContentPortlet
76 *
77 * TODO: Preferences, cache stream instead of URL *
78 *
79 * @author <a href="mailto:rogerrutr@apache.org">Roger Ruttimann </a>
80 * @version $Id: WebContentPortlet.java 517719 2007-03-13 15:05:48Z ate $
81 */
82
83 public class WebContentPortlet extends GenericVelocityPortlet
84 {
85
86 /***
87 * WebContentPortlet Allows navigation inside the portlet and caches the
88 * latest URL
89 */
90
91 /***
92 * Configuration constants.
93 */
94 public static final String VIEW_SOURCE_PARAM = "viewSource";
95 public static final String EDIT_SOURCE_PARAM = "editSource";
96
97
98 public static final String BROWSER_ACTION_PARAM = "wcBrowserAction";
99 public static final String BROWSER_ACTION_PREVIOUS_PAGE = "previousPage";
100 public static final String BROWSER_ACTION_REFRESH_PAGE = "refreshPage";
101 public static final String BROWSER_ACTION_NEXT_PAGE = "nextPage";
102
103 /***
104 * Action Parameter
105 */
106
107
108
109 public static final String HISTORY = "webcontent.history";
110 public static final String HTTP_STATE = "webcontent.http.state";
111
112
113
114 protected final static Log log = LogFactory.getLog(WebContentPortlet.class);
115 public final static String defaultEncoding = "UTF-8";
116
117
118
119 private RulesetRewriter rewriter = null;
120 private RewriterController rewriteController = null;
121
122
123 public WebContentPortlet()
124 {
125 super();
126 }
127
128 /***
129 * Initialize portlet configuration.
130 */
131 public void init(PortletConfig config) throws PortletException
132 {
133 super.init(config);
134 }
135
136 /***
137 * processAction() Checks action initiated by the WebContent portlet which
138 * means that a user has clicked on an URL
139 *
140 * @param actionRequest
141 * @param actionResponse
142 * @throws PortletException
143 * @throws IOException
144 */
145 public void processAction(ActionRequest actionRequest, ActionResponse actionResponse) throws PortletException,
146 IOException
147 {
148
149 String browserAction = actionRequest.getParameter(BROWSER_ACTION_PARAM);
150 if (browserAction != null)
151 {
152 if (!browserAction.equalsIgnoreCase(BROWSER_ACTION_REFRESH_PAGE))
153 {
154
155 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
156
157 if (browserAction.equalsIgnoreCase(BROWSER_ACTION_PREVIOUS_PAGE))
158 {
159 if (history.hasPreviousPage())
160 history.getPreviousPage();
161 }
162 else if (browserAction.equalsIgnoreCase(BROWSER_ACTION_NEXT_PAGE))
163 {
164 if (history.hasNextPage())
165 history.getNextPage();
166 }
167 }
168
169 return ;
170 }
171
172
173 String webContentURL = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_URL);
174 String webContentMethod = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_METHOD);
175 Map webContentParams = new HashMap(actionRequest.getParameterMap()) ;
176
177
178 if (webContentMethod == null) webContentMethod = "" ;
179
180
181 webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_URL);
182 webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_METHOD);
183
184 if (webContentURL == null || actionRequest.getPortletMode() == PortletMode.EDIT)
185 {
186 processPreferencesAction(actionRequest, actionResponse);
187 webContentURL = actionRequest.getPreferences().getValue("SRC", "http://portals.apache.org");
188
189
190 webContentParams.clear();
191 }
192
193
194
195
196 if (webContentURL != null && webContentURL.length() > 0)
197 {
198
199 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
200 if (history == null)
201 history = new WebContentHistoryList();
202 history.visitPage(new WebContentHistoryPage(webContentURL,webContentParams,webContentMethod));
203 PortletMessaging.publish(actionRequest, HISTORY, history);
204 }
205 }
206
207 /***
208 * doView Renders the URL in the following order 1) SESSION_PARAMETER
209 * 2)cached version 3) defined for preference SRC
210 */
211 public void doView(RenderRequest request, RenderResponse response) throws PortletException, IOException
212 {
213 String viewPage = (String)request.getAttribute(PARAM_VIEW_PAGE);
214 if (viewPage != null)
215 {
216 super.doView(request, response);
217 return;
218 }
219
220
221 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(request, HISTORY);
222 if (history == null)
223 history = new WebContentHistoryList();
224 WebContentHistoryPage currentPage = history.getCurrentPage();
225 if (currentPage == null)
226 {
227 String sourceURL = request.getPreferences().getValue("SRC", "");
228 if (sourceURL == null)
229 {
230
231 throw new PortletException("WebContent source not specified. Go to edit mode and specify an URL.");
232 }
233 currentPage = new WebContentHistoryPage(sourceURL);
234 }
235
236
237 if (rewriteController == null)
238 {
239 PortletContext portletApplication = getPortletContext();
240 String path = portletApplication.getRealPath("/WEB-INF");
241 String contextPath = path + "/";
242 try
243 {
244
245 rewriteController = getController(contextPath);
246 }
247 catch (Exception e)
248 {
249
250 String msg = "WebContentPortlet failed to create rewriter controller.";
251 log.error(msg,e);
252 throw new PortletException(e.getMessage());
253 }
254 }
255
256
257 response.setContentType("text/html");
258 byte[] content = doWebContent(currentPage.getUrl(), currentPage.getParams(), currentPage.isPost(), request, response);
259
260
261
262 PrintWriter writer = response.getWriter();
263 writer.print("<block>");
264 if (history.hasPreviousPage())
265 {
266 PortletURL prevAction = response.createActionURL() ;
267 prevAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_PREVIOUS_PAGE);
268 writer.print(" [<a href=\"" + prevAction.toString() +"\">Previous Page</a>] ");
269 }
270 PortletURL refreshAction = response.createActionURL() ;
271 refreshAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_REFRESH_PAGE);
272 writer.print(" [<a href=\"" + refreshAction.toString() +"\">Refresh Page</a>] ");
273 if (history.hasNextPage())
274 {
275 PortletURL nextAction = response.createActionURL() ;
276 nextAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_NEXT_PAGE);
277 writer.print(" [<a href=\"" + nextAction.toString() +"\">Next Page</a>] ");
278 }
279 writer.print("</block><hr/>");
280
281
282 ByteArrayInputStream bais = new ByteArrayInputStream(content);
283 drain(new InputStreamReader(bais, WebContentPortlet.defaultEncoding), writer);
284 bais.close();
285
286
287 history.visitPage(currentPage);
288 PortletMessaging.publish(request, HISTORY, history);
289 }
290
291 public void doEdit(RenderRequest request, RenderResponse response) throws PortletException, IOException
292 {
293 response.setContentType("text/html");
294 doPreferencesEdit(request, response);
295 }
296
297
298
299
300 protected byte[] doWebContent(String sourceAttr, Map sourceParams, boolean isPost, RenderRequest request, RenderResponse response)
301 throws PortletException
302 {
303 HttpMethod httpMethod = null ;
304
305 try
306 {
307
308 PortletURL action = response.createActionURL();
309 ((WebContentRewriter) rewriter).setActionURL(action);
310 URL baseURL = new URL(sourceAttr);
311 rewriter.setBaseUrl(baseURL.toString());
312
313
314 if (baseURL.getProtocol().equals("file"))
315 {
316 Reader reader = new InputStreamReader((InputStream)baseURL.getContent());
317 StringWriter writer = new StringWriter();
318 rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), reader, writer);
319 writer.flush();
320 return writer.toString().getBytes();
321 }
322
323
324
325 HttpClient httpClient = getHttpClient(request) ;
326 httpMethod = getHttpMethod(httpClient, getURLSource(sourceAttr, sourceParams, request, response), sourceParams, isPost, request);
327 byte[] result = doPreemptiveAuthentication(httpClient, httpMethod, request, response);
328
329
330 if (result == null) {
331 return doHttpWebContent(httpClient, httpMethod, 0, request, response);
332 } else {
333 return result;
334 }
335 }
336 catch (PortletException pex)
337 {
338
339 throw pex;
340 }
341 catch (Exception ex)
342 {
343 String msg = "Exception while rewritting HTML content" ;
344 log.error(msg,ex);
345 throw new PortletException(msg+", Error: "+ex.getMessage());
346 }
347 finally
348 {
349
350 if (httpMethod != null)
351 httpMethod.releaseConnection();
352 }
353 }
354
355 protected byte[] doHttpWebContent(HttpClient httpClient, HttpMethod httpMethod, int retryCount, RenderRequest request, RenderResponse response)
356 throws PortletException
357 {
358 try
359 {
360
361
362
363
364 httpClient.executeMethod(httpMethod);
365
366
367 rewriter.setBaseUrl( rewriter.getBaseRelativeUrl( httpMethod.getPath() )) ;
368
369
370
371 Cookie[] cookies = httpClient.getState().getCookies();
372 PortletMessaging.publish(request, HTTP_STATE, cookies);
373
374
375
376
377 int responseCode = httpMethod.getStatusCode();
378 if (responseCode >= 300 && responseCode <= 399)
379 {
380
381 Header locationHeader = httpMethod.getResponseHeader("location");
382 String redirectLocation = locationHeader != null ? locationHeader.getValue() : null ;
383 if (redirectLocation != null)
384 {
385
386
387
388 return doWebContent( redirectLocation, new HashMap(), false, request, response ) ;
389 }
390 else
391 {
392
393 throw new PortletException("Redirection code: "+responseCode+", but with no redirectionLocation set.");
394 }
395 }
396 else if ( responseCode >= 400 )
397 {
398 if ( responseCode == 401 )
399 {
400 if (httpMethod.getHostAuthState().isAuthRequested() && retryCount++ < 1 && doRequestedAuthentication( httpClient, httpMethod, request, response))
401 {
402
403 return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
404 }
405 else
406 {
407
408 throw new PortletException("Site requested authorization, but we are unable to provide credentials");
409 }
410 }
411 else if (retryCount++ < 3)
412 {
413 log.info("WebContentPortlet.doHttpWebContent() - retrying: "+httpMethod.getPath()+", response code: "+responseCode);
414
415
416 return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
417 }
418 else
419 {
420
421 throw new PortletException("Failure reading: "+httpMethod.getPath()+", response code: "+responseCode);
422 }
423 }
424
425
426
427
428 BufferedInputStream bis = new BufferedInputStream(httpMethod.getResponseBodyAsStream());
429 String encoding = ((HttpMethodBase)httpMethod).getResponseCharSet();
430 if (encoding == null)
431 encoding = getContentCharSet(bis);
432 Reader htmlReader = new InputStreamReader(bis, encoding);
433
434
435 if (encoding == null)
436 encoding = WebContentPortlet.defaultEncoding ;
437 ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
438 Writer htmlWriter = new OutputStreamWriter(byteOutputStream, encoding);
439
440
441 rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), htmlReader, htmlWriter);
442 htmlWriter.flush();
443
444
445
446
447 return byteOutputStream.toByteArray();
448 }
449 catch (UnsupportedEncodingException ueex)
450 {
451 throw new PortletException("Encoding " + defaultEncoding + " not supported. Error: " + ueex.getMessage());
452 }
453 catch (RewriterException rwe)
454 {
455 throw new PortletException("Failed to rewrite HTML page. Error: " + rwe.getMessage());
456 }
457 catch (Exception e)
458 {
459 throw new PortletException("Exception while rewritting HTML page. Error: " + e.getMessage());
460 }
461 }
462
463 protected String getURLSource(String source, Map params, RenderRequest request, RenderResponse response)
464 {
465 return source;
466 }
467
468 protected byte[] doPreemptiveAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
469 {
470
471 return null ;
472 }
473
474 protected boolean doRequestedAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
475 {
476
477 return false ;
478 }
479
480
481
482
483 private RewriterController getController(String contextPath) throws Exception
484 {
485 Class[] rewriterClasses = new Class[]
486 { WebContentRewriter.class, WebContentRewriter.class};
487
488 Class[] adaptorClasses = new Class[]
489 { NekoParserAdaptor.class, SaxParserAdaptor.class};
490 RewriterController rwc = new JetspeedRewriterController(contextPath + "conf/rewriter-rules-mapping.xml", Arrays
491 .asList(rewriterClasses), Arrays.asList(adaptorClasses));
492
493 FileReader reader = new FileReader(contextPath + "conf/default-rewriter-rules.xml");
494
495 Ruleset ruleset = rwc.loadRuleset(reader);
496 reader.close();
497 rewriter = rwc.createRewriter(ruleset);
498 return rwc;
499 }
500
501 protected HttpClient getHttpClient(RenderRequest request) throws IOException
502 {
503
504 HttpClient client = new HttpClient();
505
506
507 Cookie[] cookies = (Cookie[])PortletMessaging.receive(request, HTTP_STATE);
508 if (cookies != null)
509 {
510
511 client.getState().addCookies(cookies);
512
513
514
515 }
516
517 return client ;
518 }
519
520 protected HttpMethodBase getHttpMethod(HttpClient client, String uri, Map params, boolean isPost, RenderRequest request) throws IOException
521 {
522 HttpMethodBase httpMethod = null;
523 String useragentProperty = request.getProperty("User-Agent");
524 if (!isPost)
525 {
526
527
528
529 httpMethod = new GetMethod(uri);
530 if (params != null && !params.isEmpty())
531 {
532 ArrayList pairs = new ArrayList();
533 Iterator iter = params.entrySet().iterator();
534 while (iter.hasNext())
535 {
536 Map.Entry entry = (Map.Entry)iter.next() ;
537 String name = (String)entry.getKey() ;
538 String[] values = (String [])entry.getValue() ;
539 if (values != null)
540 for (int i = 0,limit = values.length; i < limit; i++)
541 {
542
543 pairs.add(new NameValuePair(name, values[i]));
544 }
545 }
546 httpMethod.setQueryString((NameValuePair[])pairs.toArray(new NameValuePair[pairs.size()]));
547 }
548
549
550 httpMethod.setFollowRedirects(true);
551 }
552 else
553 {
554
555
556
557 PostMethod postMethod = (PostMethod)( httpMethod = new PostMethod(uri)) ;
558 if (params != null && !params.isEmpty())
559 {
560 Iterator iter = params.entrySet().iterator();
561 while (iter.hasNext())
562 {
563 Map.Entry entry = (Map.Entry)iter.next();
564 String name = (String)entry.getKey();
565 String[] values = (String[])entry.getValue();
566 if (values != null)
567 for (int i=0,limit=values.length; i<limit; i++)
568 {
569
570
571 postMethod.addParameter(name, values[i]);
572 }
573 }
574 }
575 }
576
577
578 httpMethod.addRequestHeader( "User-Agent", useragentProperty );
579
580
581
582
583
584 return httpMethod ;
585 }
586
587
588 static final int BLOCK_SIZE = 4096;
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613 private void drain(Reader r, Writer w) throws IOException
614 {
615 char[] bytes = new char[BLOCK_SIZE];
616 try
617 {
618 int length = r.read(bytes);
619 while (length != -1)
620 {
621 if (length != 0)
622 {
623 w.write(bytes, 0, length);
624 }
625 length = r.read(bytes);
626 }
627 }
628 finally
629 {
630 bytes = null;
631 }
632
633 }
634
635
636
637
638
639
640
641
642
643
644 private String getContentCharSet(InputStream is) throws IOException
645 {
646 if (!is.markSupported())
647 {
648 return null;
649 }
650
651 byte[] buf = new byte[BLOCK_SIZE];
652 try
653 {
654 is.mark(BLOCK_SIZE);
655 is.read(buf, 0, BLOCK_SIZE);
656 String content = new String(buf, "ISO-8859-1");
657 String lowerCaseContent = content.toLowerCase();
658 int startIndex = lowerCaseContent.indexOf("<head");
659 if (startIndex == -1)
660 {
661 startIndex = 0;
662 }
663 int endIndex = lowerCaseContent.indexOf("</head");
664 if (endIndex == -1)
665 {
666 endIndex = content.length();
667 }
668 content = content.substring(startIndex, endIndex);
669
670 StringTokenizer st = new StringTokenizer(content, "<>");
671 while (st.hasMoreTokens())
672 {
673 String element = st.nextToken();
674 String lowerCaseElement = element.toLowerCase();
675 if (lowerCaseElement.startsWith("meta") && lowerCaseElement.indexOf("content-type") > 0)
676 {
677 StringTokenizer est = new StringTokenizer(element, " =\"\';");
678 while (est.hasMoreTokens())
679 {
680 if (est.nextToken().equalsIgnoreCase("charset"))
681 {
682 if (est.hasMoreTokens())
683 {
684 return est.nextToken();
685 }
686 }
687 }
688 }
689 }
690 }
691 catch (IOException e)
692 {
693 }
694 finally
695 {
696 is.reset();
697 }
698
699 return null;
700 }
701 }