View Javadoc

1   /* 
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *     http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.jetspeed.portlet;
18  
19  import java.io.BufferedInputStream;
20  import java.io.ByteArrayInputStream;
21  import java.io.ByteArrayOutputStream;
22  import java.io.FileReader;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.InputStreamReader;
26  import java.io.OutputStreamWriter;
27  import java.io.PrintWriter;
28  import java.io.Reader;
29  import java.io.StringWriter;
30  import java.io.UnsupportedEncodingException;
31  import java.io.Writer;
32  import java.net.URL;
33  import java.util.ArrayList;
34  import java.util.Arrays;
35  import java.util.HashMap;
36  import java.util.Iterator;
37  import java.util.Map;
38  import java.util.StringTokenizer;
39  
40  import javax.portlet.ActionRequest;
41  import javax.portlet.ActionResponse;
42  import javax.portlet.PortletConfig;
43  import javax.portlet.PortletContext;
44  import javax.portlet.PortletException;
45  import javax.portlet.PortletMode;
46  import javax.portlet.PortletURL;
47  import javax.portlet.RenderRequest;
48  import javax.portlet.RenderResponse;
49  
50  import org.apache.commons.httpclient.Cookie;
51  import org.apache.commons.httpclient.Header;
52  import org.apache.commons.httpclient.HttpClient;
53  import org.apache.commons.httpclient.HttpMethod;
54  import org.apache.commons.httpclient.HttpMethodBase;
55  import org.apache.commons.httpclient.NameValuePair;
56  import org.apache.commons.httpclient.methods.GetMethod;
57  import org.apache.commons.httpclient.methods.PostMethod;
58  import org.apache.commons.logging.Log;
59  import org.apache.commons.logging.LogFactory;
60  import org.apache.jetspeed.portlet.webcontent.WebContentHistoryList;
61  import org.apache.jetspeed.portlet.webcontent.WebContentHistoryPage;
62  import org.apache.jetspeed.rewriter.JetspeedRewriterController;
63  import org.apache.jetspeed.rewriter.RewriterController;
64  import org.apache.jetspeed.rewriter.RewriterException;
65  import org.apache.jetspeed.rewriter.RulesetRewriter;
66  import org.apache.jetspeed.rewriter.WebContentRewriter;
67  import org.apache.jetspeed.rewriter.html.neko.NekoParserAdaptor;
68  import org.apache.jetspeed.rewriter.rules.Ruleset;
69  import org.apache.jetspeed.rewriter.xml.SaxParserAdaptor;
70  import org.apache.portals.bridges.velocity.GenericVelocityPortlet;
71  import org.apache.portals.messaging.PortletMessaging;
72  
73  
74  /***
75   * WebContentPortlet
76   * 
77   * TODO: Preferences, cache stream instead of URL *
78   * 
79   * @author <a href="mailto:rogerrutr@apache.org">Roger Ruttimann </a>
80   * @version $Id: WebContentPortlet.java 517719 2007-03-13 15:05:48Z ate $
81   */
82  
83  public class WebContentPortlet extends GenericVelocityPortlet
84  {
85  
86      /***
87       * WebContentPortlet Allows navigation inside the portlet and caches the
88       * latest URL
89       */
90  
91      /***
92       * Configuration constants.
93       */
94      public static final String VIEW_SOURCE_PARAM = "viewSource";
95      public static final String EDIT_SOURCE_PARAM = "editSource";
96      
97      // ...browser action buttons
98      public static final String BROWSER_ACTION_PARAM = "wcBrowserAction"; 
99      public static final String BROWSER_ACTION_PREVIOUS_PAGE = "previousPage"; 
100     public static final String BROWSER_ACTION_REFRESH_PAGE = "refreshPage"; 
101     public static final String BROWSER_ACTION_NEXT_PAGE = "nextPage"; 
102 
103     /***
104      * Action Parameter
105      */
106 
107     // WebContent session data 
108 
109     public static final String HISTORY = "webcontent.history";
110     public static final String HTTP_STATE = "webcontent.http.state";
111     
112     // Class Data
113     
114     protected final static Log log = LogFactory.getLog(WebContentPortlet.class);
115     public final static String defaultEncoding = "UTF-8";
116 
117     // Data Members
118     
119     private RulesetRewriter rewriter = null;
120     private RewriterController rewriteController = null;
121 
122     
123     public WebContentPortlet()
124     {
125         super();
126     }
127 
128     /***
129      * Initialize portlet configuration.
130      */
131     public void init(PortletConfig config) throws PortletException
132     {
133         super.init(config);
134     }
135 
136     /***
137      * processAction() Checks action initiated by the WebContent portlet which
138      * means that a user has clicked on an URL
139      * 
140      * @param actionRequest
141      * @param actionResponse
142      * @throws PortletException
143      * @throws IOException
144      */
145     public void processAction(ActionRequest actionRequest, ActionResponse actionResponse) throws PortletException,
146             IOException
147     {
148         // check to see if it is a meta-navigation command
149         String browserAction = actionRequest.getParameter(BROWSER_ACTION_PARAM);
150         if (browserAction != null)
151         {
152             if (!browserAction.equalsIgnoreCase(BROWSER_ACTION_REFRESH_PAGE))
153             {
154                 // for Refresh, there is nothing special to do - current history page will be re-displayed
155                 WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
156                 
157                 if (browserAction.equalsIgnoreCase(BROWSER_ACTION_PREVIOUS_PAGE))
158                 {
159                     if (history.hasPreviousPage())
160                         history.getPreviousPage();
161                 }
162                 else if (browserAction.equalsIgnoreCase(BROWSER_ACTION_NEXT_PAGE))
163                 {
164                     if (history.hasNextPage())
165                         history.getNextPage();
166                 }
167             }
168             
169             return ;   // proceed to doView() with adjusted history
170         }
171         
172         // Check if an action parameter was defined        
173         String webContentURL = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_URL);
174         String webContentMethod = actionRequest.getParameter(WebContentRewriter.ACTION_PARAMETER_METHOD);
175         Map webContentParams = new HashMap(actionRequest.getParameterMap()) ;
176         
177         // defaults
178         if (webContentMethod == null) webContentMethod = "" ;   // default to GET
179         
180         // parameter map includes the URL (as ACTION_PARAMETER_URL), but all actual params as well
181         webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_URL);
182         webContentParams.remove(WebContentRewriter.ACTION_PARAMETER_METHOD);
183         
184         if (webContentURL == null || actionRequest.getPortletMode() == PortletMode.EDIT)
185         {
186             processPreferencesAction(actionRequest, actionResponse);            
187             webContentURL = actionRequest.getPreferences().getValue("SRC", "http://portals.apache.org");
188 
189             // parameters are for the EDIT mode form, and should not be propagated to the subsequent GET in doView
190             webContentParams.clear();
191         }
192 
193         /*
194          * If the webContentParameter is not empty attach the URL to the session
195          */
196         if (webContentURL != null && webContentURL.length() > 0)
197         {
198             // new page visit - make it the current page in the history
199             WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(actionRequest, HISTORY);
200             if (history == null)
201                 history = new WebContentHistoryList();
202             history.visitPage(new WebContentHistoryPage(webContentURL,webContentParams,webContentMethod));
203             PortletMessaging.publish(actionRequest, HISTORY, history);
204         }
205     }
206 
207     /***
208      * doView Renders the URL in the following order 1) SESSION_PARAMETER
209      * 2)cached version 3) defined for preference SRC
210      */
211     public void doView(RenderRequest request, RenderResponse response) throws PortletException, IOException
212     {
213         String viewPage = (String)request.getAttribute(PARAM_VIEW_PAGE);
214         if (viewPage != null)
215         {
216             super.doView(request, response);
217             return;
218         }
219         
220         // view the current page in the history
221         WebContentHistoryList history = (WebContentHistoryList)PortletMessaging.receive(request, HISTORY);
222         if (history == null)
223             history = new WebContentHistoryList();
224         WebContentHistoryPage currentPage = history.getCurrentPage();
225         if (currentPage == null)
226         {
227             String sourceURL = request.getPreferences().getValue("SRC", "");
228             if (sourceURL == null)
229             {
230                 // BOZO - switch to edit mode automatically here, instead of throwing exception!
231                 throw new PortletException("WebContent source not specified. Go to edit mode and specify an URL.");
232             }
233             currentPage = new WebContentHistoryPage(sourceURL);
234         }
235 
236         // Initialize the controller if it's not already done
237         if (rewriteController == null)
238         {
239             PortletContext portletApplication = getPortletContext(); 
240             String path = portletApplication.getRealPath("/WEB-INF");
241             String contextPath = path + "/";
242             try
243             {
244                 // Create rewriter adaptor
245                 rewriteController = getController(contextPath);
246             }
247             catch (Exception e)
248             {
249                 // Failed to create rewriter controller
250                 String msg = "WebContentPortlet failed to create rewriter controller.";
251                 log.error(msg,e);
252                 throw new PortletException(e.getMessage());
253             }
254         }
255 
256         // get content from current page
257         response.setContentType("text/html");
258         byte[] content = doWebContent(currentPage.getUrl(), currentPage.getParams(), currentPage.isPost(), request, response);
259         // System.out.println("Rewritten content is\n..."+new String(content));
260         
261         // write the meta-control navigation header
262         PrintWriter writer = response.getWriter();
263         writer.print("<block>");
264         if (history.hasPreviousPage())
265         {
266             PortletURL prevAction = response.createActionURL() ;
267             prevAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_PREVIOUS_PAGE);
268             writer.print(" [<a href=\"" + prevAction.toString() +"\">Previous Page</a>] ");
269         }
270         PortletURL refreshAction = response.createActionURL() ;
271         refreshAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_REFRESH_PAGE);
272         writer.print(" [<a href=\"" + refreshAction.toString() +"\">Refresh Page</a>] ");
273         if (history.hasNextPage())
274         {
275             PortletURL nextAction = response.createActionURL() ;
276             nextAction.setParameter(BROWSER_ACTION_PARAM, BROWSER_ACTION_NEXT_PAGE);
277             writer.print(" [<a href=\"" + nextAction.toString() +"\">Next Page</a>] ");
278         }
279         writer.print("</block><hr/>");
280 
281         // drain the stream to the portlet window
282         ByteArrayInputStream bais = new ByteArrayInputStream(content);
283         drain(new InputStreamReader(bais, WebContentPortlet.defaultEncoding), writer);
284         bais.close();
285         
286         // done, cache results in the history and save the history
287         history.visitPage(currentPage);
288         PortletMessaging.publish(request, HISTORY, history);
289     }
290 
291     public void doEdit(RenderRequest request, RenderResponse response) throws PortletException, IOException
292     {
293         response.setContentType("text/html");
294         doPreferencesEdit(request, response);
295     }
296         
297     /*
298      * Privaye helpers for generating WebContent
299      */
300     protected byte[] doWebContent(String sourceAttr, Map sourceParams, boolean isPost, RenderRequest request, RenderResponse response)
301         throws PortletException
302     {
303         HttpMethod httpMethod = null ;
304         
305         try
306         {
307             // Set the action and base URLs in the rewriter
308             PortletURL action = response.createActionURL();
309             ((WebContentRewriter) rewriter).setActionURL(action);
310             URL baseURL = new URL(sourceAttr);
311             rewriter.setBaseUrl(baseURL.toString());
312             
313             // ...file URLs may be used for testing
314             if (baseURL.getProtocol().equals("file"))
315             {
316                 Reader reader = new InputStreamReader((InputStream)baseURL.getContent());
317                 StringWriter writer = new StringWriter();
318                 rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), reader, writer);
319                 writer.flush();
320                 return writer.toString().getBytes();
321             }
322             // else fall through to normal case (http/https)...
323             
324             // ...set up URL and HttpClient stuff
325             HttpClient httpClient = getHttpClient(request) ;
326             httpMethod = getHttpMethod(httpClient, getURLSource(sourceAttr, sourceParams, request, response), sourceParams, isPost, request);
327             byte[] result = doPreemptiveAuthentication(httpClient, httpMethod, request, response);
328             
329             // ...get, cache, and return the content
330             if (result == null) {
331             	return doHttpWebContent(httpClient, httpMethod, 0, request, response);
332             } else {
333             	return result;
334             }
335         }
336         catch (PortletException pex)
337         {
338             // already reported
339             throw pex;
340         }
341         catch (Exception ex)
342         {
343             String msg = "Exception while rewritting HTML content" ;
344             log.error(msg,ex);
345             throw new PortletException(msg+", Error: "+ex.getMessage());
346         }
347         finally
348         {
349             // release the http connection
350             if (httpMethod != null)
351                 httpMethod.releaseConnection();
352         }
353     }
354 
355     protected byte[] doHttpWebContent(HttpClient httpClient, HttpMethod httpMethod, int retryCount, RenderRequest request, RenderResponse response)
356             throws PortletException
357     {
358         try
359         {
360             // Get the input stream from the provided httpClient/httpMethod
361             // System.out.println("WebContentPortlet.doHttpWebContent() - from path: "+httpMethod.getPath());
362             
363             // ...set up URL and HttpClient stuff
364             httpClient.executeMethod(httpMethod);
365             
366             // ...reset base URL with fully resolved path (e.g. if a directory, path will end with a /, which it may not have in the call to this method)
367             rewriter.setBaseUrl( rewriter.getBaseRelativeUrl( httpMethod.getPath() )) ;
368             // System.out.println("...reset base URL from final path: "+httpMethod.getPath());
369             
370             // ...save updated state
371             Cookie[] cookies = httpClient.getState().getCookies();
372             PortletMessaging.publish(request, HTTP_STATE, cookies);
373             // System.out.println("...saving: "+(cookies != null ? cookies.length : 0)+", cookies...");
374             //    for(int i=0,limit = cookies != null ? cookies.length : 0; i<limit; i++) System.out.println("...cookie["+i+"] is: "+cookies[i]);
375 
376             // ...check for manual redirects
377             int responseCode = httpMethod.getStatusCode();
378             if (responseCode >= 300 && responseCode <= 399)
379             {
380                 // redirection that could not be handled automatically!!! (probably from a POST)
381                 Header locationHeader = httpMethod.getResponseHeader("location");
382                 String redirectLocation = locationHeader != null ? locationHeader.getValue() : null ;
383                 if (redirectLocation != null)
384                 {
385                     // System.out.println("WebContentPortlet.doHttpWebContent() >>>handling redirect to: "+redirectLocation+"<<<");
386                     
387                     // one more time (assume most params are already encoded & new URL is using GET protocol!)
388                     return doWebContent( redirectLocation, new HashMap(), false, request, response ) ;
389                 }
390                 else
391                 {
392                     // The response is a redirect, but did not provide the new location for the resource.
393                     throw new PortletException("Redirection code: "+responseCode+", but with no redirectionLocation set.");
394                 }
395             }
396             else if ( responseCode >= 400 )
397             {
398                 if ( responseCode == 401 )
399                 {
400                     if (httpMethod.getHostAuthState().isAuthRequested() && retryCount++ < 1 && doRequestedAuthentication( httpClient, httpMethod, request, response))
401                     {
402                         // try again, now that we are authorizied
403                         return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
404                     }
405                     else
406                     {
407                         // could not authorize
408                         throw new PortletException("Site requested authorization, but we are unable to provide credentials");
409                     }
410                 }
411                 else if (retryCount++ < 3)
412                 {
413                     log.info("WebContentPortlet.doHttpWebContent() - retrying: "+httpMethod.getPath()+", response code: "+responseCode);
414                     
415                     // retry
416                     return doHttpWebContent(httpClient, httpMethod, retryCount, request, response);
417                 }
418                 else
419                 {
420                     // bad
421                     throw new PortletException("Failure reading: "+httpMethod.getPath()+", response code: "+responseCode);
422                 }
423             }
424             
425             // System.out.println("...response code: "+responseCode+", fetching content as stream and rewriting.");
426             
427             // ...ok - *now* create the input stream and reader
428             BufferedInputStream bis = new BufferedInputStream(httpMethod.getResponseBodyAsStream());
429             String encoding = ((HttpMethodBase)httpMethod).getResponseCharSet();
430             if (encoding == null)
431                 encoding = getContentCharSet(bis);
432             Reader htmlReader = new InputStreamReader(bis, encoding);
433             
434             // get the output buffer
435             if (encoding == null)
436                 encoding = WebContentPortlet.defaultEncoding ;
437             ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
438             Writer htmlWriter = new OutputStreamWriter(byteOutputStream, encoding);
439 
440             // rewrite and flush output
441             rewriter.rewrite(rewriteController.createParserAdaptor("text/html"), htmlReader, htmlWriter);
442             htmlWriter.flush();
443 
444             // Page has been rewritten
445             // TODO: Write it to cache
446             //System.out.println(new String(byteOutputStream.toByteArray()));
447             return byteOutputStream.toByteArray();
448         }
449         catch (UnsupportedEncodingException ueex)
450         {
451             throw new PortletException("Encoding " + defaultEncoding + " not supported. Error: " + ueex.getMessage());
452         }
453         catch (RewriterException rwe)
454         {
455             throw new PortletException("Failed to rewrite HTML page. Error: " + rwe.getMessage());
456         }
457         catch (Exception e)
458         {
459             throw new PortletException("Exception while rewritting HTML page. Error: " + e.getMessage());
460         }
461     }
462     
463     protected String getURLSource(String source, Map params, RenderRequest request, RenderResponse response)
464     {
465         return source;    
466     }
467     
468     protected byte[] doPreemptiveAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
469     {
470         // derived class responsibilty - return true, if credentials have been set
471         return null ;
472     }
473     
474     protected boolean doRequestedAuthentication(HttpClient clent,HttpMethod method, RenderRequest request, RenderResponse response)
475     {
476         // derived class responsibilty - return true, if credentials have been set
477         return false ;
478     }
479 
480     /*
481      * Generate a rewrite controller using the basic rules file
482      */
483     private RewriterController getController(String contextPath) throws Exception
484     {
485         Class[] rewriterClasses = new Class[]
486         { WebContentRewriter.class, WebContentRewriter.class};
487         
488         Class[] adaptorClasses = new Class[]
489         { NekoParserAdaptor.class, SaxParserAdaptor.class};
490         RewriterController rwc = new JetspeedRewriterController(contextPath + "conf/rewriter-rules-mapping.xml", Arrays
491                 .asList(rewriterClasses), Arrays.asList(adaptorClasses));
492 
493         FileReader reader = new FileReader(contextPath + "conf/default-rewriter-rules.xml");
494 
495         Ruleset ruleset = rwc.loadRuleset(reader);
496         reader.close();
497         rewriter = rwc.createRewriter(ruleset);
498         return rwc;
499     }
500 
501     protected HttpClient getHttpClient(RenderRequest request) throws IOException
502     {
503         // derived class hook (e.g. to set up Basic Authentication)
504         HttpClient client = new HttpClient();
505         
506         // reuse existing state, if we have been here before
507         Cookie[] cookies = (Cookie[])PortletMessaging.receive(request, HTTP_STATE);
508         if (cookies != null)
509         {
510             // ...so far, just saving cookies - may need a more complex Serializable object here
511             client.getState().addCookies(cookies);
512 
513             // System.out.println("WebContentPortlet.getHttpClient() - reusing: "+cookies.length+", cookies...");
514             //    for(int i=0,limit = cookies.length; i<limit; i++) System.out.println("...cookie["+i+"] is: "+cookies[i]);
515         }
516  
517         return client ;
518     }
519     
520     protected HttpMethodBase getHttpMethod(HttpClient client, String uri, Map params, boolean isPost, RenderRequest request) throws IOException
521     {
522         HttpMethodBase httpMethod = null;
523         String useragentProperty = request.getProperty("User-Agent");
524         if (!isPost)
525         {
526             // System.out.println("WebContentPortlet.getHttpMethod() - HTTP GET from URL: "+uri);
527             
528             // http GET
529             httpMethod = new GetMethod(uri);
530             if (params != null && !params.isEmpty())
531             {
532                 ArrayList pairs = new ArrayList();
533                 Iterator iter = params.entrySet().iterator();
534                 while (iter.hasNext())
535                 {
536                     Map.Entry entry = (Map.Entry)iter.next() ;
537                     String name = (String)entry.getKey() ;
538                     String[] values = (String [])entry.getValue() ;
539                     if (values != null)
540                         for (int i = 0,limit = values.length; i < limit; i++)
541                         {
542                             // System.out.println("...adding >>>GET parameter: "+name+", with value: "+values[i]+"<<<");
543                             pairs.add(new NameValuePair(name, values[i]));
544                         }
545                 }
546                 httpMethod.setQueryString((NameValuePair[])pairs.toArray(new NameValuePair[pairs.size()]));
547             }
548             
549             // automatically follow redirects (NOTE: not supported in POST - will throw exeception if you ask for it, then sees a redirect!!)
550             httpMethod.setFollowRedirects(true);
551         }
552         else
553         {
554             // System.out.println("WebContentPortlet.getHttpMethod() - HTTP POST to URL: "+uri);
555             
556             // http POST
557             PostMethod postMethod = (PostMethod)( httpMethod = new PostMethod(uri)) ; 
558             if (params != null && !params.isEmpty())
559             {
560                 Iterator iter = params.entrySet().iterator();
561                 while (iter.hasNext())
562                 {
563                     Map.Entry entry = (Map.Entry)iter.next();
564                     String name = (String)entry.getKey(); 
565                     String[] values = (String[])entry.getValue();
566                     if (values != null)
567                         for (int i=0,limit=values.length; i<limit; i++)
568                         {
569                             // System.out.println("...adding >>>POST parameter: "+name+", with value: "+values[i]+"<<<");
570                             
571                             postMethod.addParameter(name, values[i]);
572                         }
573                 }   
574             }
575         }
576         
577         // propagate User-Agent, so target site does not think we are a D.O.S. attack
578         httpMethod.addRequestHeader( "User-Agent", useragentProperty );
579         
580         // BOZO - DON'T do this.   default policy seems to be more flexible!!!
581         //httpMethod.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
582         
583         // ...ready to use!
584         return httpMethod ;
585     }
586 
587 
588     static final int BLOCK_SIZE = 4096;
589 
590     /*
591     private void drain(InputStream reader, OutputStream writer) throws IOException
592     {
593         byte[] bytes = new byte[BLOCK_SIZE];
594         try
595         {
596             int length = reader.read(bytes);
597             while (length != -1)
598             {
599                 if (length != 0)
600                 {
601                     writer.write(bytes, 0, length);
602                 }
603                 length = reader.read(bytes);
604             }
605         }
606         finally
607         {
608             bytes = null;
609         }
610     }
611     */
612 
613     private void drain(Reader r, Writer w) throws IOException
614     {
615         char[] bytes = new char[BLOCK_SIZE];
616         try
617         {
618             int length = r.read(bytes);
619             while (length != -1)
620             {
621                 if (length != 0)
622                 {
623                     w.write(bytes, 0, length);
624                 }
625                 length = r.read(bytes);
626             }
627         }
628         finally
629         {
630             bytes = null;
631         }
632 
633     }
634 
635     /*
636     private void drain(Reader r, OutputStream os) throws IOException
637     {
638         Writer w = new OutputStreamWriter(os);
639         drain(r, w);
640         w.flush();
641     }
642     */
643 
644     private String getContentCharSet(InputStream is) throws IOException
645     {
646         if (!is.markSupported())
647         {
648             return null;
649         }
650 
651         byte[] buf = new byte[BLOCK_SIZE];
652         try
653         {
654             is.mark(BLOCK_SIZE);
655             is.read(buf, 0, BLOCK_SIZE);
656             String content = new String(buf, "ISO-8859-1");
657             String lowerCaseContent = content.toLowerCase();
658             int startIndex = lowerCaseContent.indexOf("<head");
659             if (startIndex == -1)
660             {
661                 startIndex = 0;
662             }
663             int endIndex = lowerCaseContent.indexOf("</head");
664             if (endIndex == -1)
665             {
666                 endIndex = content.length();
667             }
668             content = content.substring(startIndex, endIndex);
669 
670             StringTokenizer st = new StringTokenizer(content, "<>");
671             while (st.hasMoreTokens())
672             {
673                 String element = st.nextToken();
674                 String lowerCaseElement = element.toLowerCase();
675                 if (lowerCaseElement.startsWith("meta") && lowerCaseElement.indexOf("content-type") > 0)
676                 {
677                     StringTokenizer est = new StringTokenizer(element, " =\"\';");
678                     while (est.hasMoreTokens())
679                     {
680                         if (est.nextToken().equalsIgnoreCase("charset"))
681                         {
682                             if (est.hasMoreTokens())
683                             {
684                                 return est.nextToken();
685                             }
686                         }
687                     }
688                 }
689             }
690         }
691         catch (IOException e)
692         {
693         }
694         finally
695         {
696             is.reset();
697         }
698 
699         return null;
700     }
701 }