/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.help.internal.search;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.StringTokenizer;
import org.apache.lucene.demo.html.HTMLParser;
import org.eclipse.help.internal.base.HelpBasePlugin;
import org.eclipse.help.internal.search.ASCIIReader;

public class HTMLDocParser {
    private static final int MAX_OFFSET = 2048;
    final String ELEMENT_META = "META";
    final String ELEMENT_BODY = "body";
    final String ELEMENT_HEAD = "head";
    final String ATTRIBUTE_HTTP = "http-equiv";
    final String ATTRIBUTE_HTTP_VALUE = "content-type";
    final String ATTRIBUTE_CONTENT = "content";
    final int STATE_ELEMENT_START = 0;
    final int STATE_ELEMENT_AFTER_LT = 1;
    final int STATE_ELEMENT_AFTER_LT_SLASH = 2;
    final int STATE_ELEMENT_META = 3;
    final int STATE_HTTP_START = 0;
    final int STATE_HTTP_AFTER_NAME = 1;
    final int STATE_HTTP_AFTER_EQ = 2;
    final int STATE_HTTP_DONE = 3;
    final int STATE_CONTENT_START = 0;
    final int STATE_CONTENT_AFTER_NAME = 1;
    final int STATE_CONTENT_AFTER_EQ = 2;
    final int STATE_CONTENT_DONE = 3;
    private HTMLParser htmlParser;
    private InputStream inputStream = null;

    public void openDocument(URL url) throws IOException {
        this.inputStream = url.openStream();
        String encoding = this.getCharsetFromHTML(this.inputStream);
        try {
            this.inputStream.close();
        }
        catch (IOException closeIOE) {
            // empty catch block
        }
        this.inputStream = url.openStream();
        if (encoding != null) {
            try {
                this.htmlParser = new HTMLParser((Reader)new InputStreamReader(this.inputStream, encoding));
            }
            catch (UnsupportedEncodingException uee) {
                if (HelpBasePlugin.DEBUG_SEARCH) {
                    System.out.println(this.getClass().getName() + " JVM does not support encoding " + encoding + " specified in document " + url.getPath() + ". Default encoding will be used during indexing.");
                }
                this.htmlParser = new HTMLParser((Reader)new InputStreamReader(this.inputStream));
            }
        } else {
            if (HelpBasePlugin.DEBUG_SEARCH) {
                System.out.println(this.getClass().getName() + " Encoding not found in document " + url.getPath() + ". Default encoding will be used during indexing.");
            }
            this.htmlParser = new HTMLParser((Reader)new InputStreamReader(this.inputStream));
        }
    }

    public void closeDocument() {
        if (this.inputStream != null) {
            try {
                this.inputStream.close();
            }
            catch (IOException iOException) {
                // empty catch block
            }
        }
    }

    public String getTitle() throws IOException {
        if (this.htmlParser == null) {
            throw new NullPointerException();
        }
        try {
            return this.htmlParser.getTitle();
        }
        catch (InterruptedException ie) {
            return "";
        }
    }

    public String getSummary() throws IOException {
        if (this.htmlParser == null) {
            throw new NullPointerException();
        }
        try {
            return this.htmlParser.getSummary();
        }
        catch (InterruptedException ie) {
            return "";
        }
    }

    public Reader getContentReader() throws IOException {
        if (this.htmlParser == null) {
            throw new NullPointerException();
        }
        return this.htmlParser.getReader();
    }

    public String getCharsetFromHTML(InputStream is) {
        ASCIIReader asciiReader = new ASCIIReader(is, 2048);
        StreamTokenizer tokenizer = new StreamTokenizer(asciiReader);
        tokenizer.lowerCaseMode(false);
        tokenizer.ordinaryChar(39);
        tokenizer.ordinaryChar(47);
        String charset = this.getCharsetFromHTMLTokens(tokenizer);
        if (asciiReader != null) {
            try {
                ((Reader)asciiReader).close();
            }
            catch (IOException ioe) {
                // empty catch block
            }
        }
        return charset;
    }

    public String getCharsetFromHTMLTokens(StreamTokenizer tokenizer) {
        String contentValue = null;
        int stateContent = 0;
        int stateElement = 0;
        int stateHttp = 0;
        try {
            int token = tokenizer.nextToken();
            while (token != -1) {
                block1 : switch (stateElement) {
                    case 0: {
                        if (token != 60) break;
                        stateElement = 1;
                        break;
                    }
                    case 1: {
                        if (token == -3) {
                            if ("META".equalsIgnoreCase(tokenizer.sval)) {
                                stateElement = 3;
                                stateHttp = 0;
                                stateContent = 0;
                                contentValue = null;
                                break;
                            }
                            if ("body".equalsIgnoreCase(tokenizer.sval)) {
                                return null;
                            }
                            stateElement = 0;
                            break;
                        }
                        if (token == 47) {
                            stateElement = 2;
                            break;
                        }
                        stateElement = 0;
                        break;
                    }
                    case 2: {
                        if (token == -3 && "head".equalsIgnoreCase(tokenizer.sval)) {
                            return null;
                        }
                        stateElement = 0;
                        break;
                    }
                    default: {
                        switch (token) {
                            case 62: {
                                stateElement = 0;
                                break block1;
                            }
                            case -3: {
                                if ("http-equiv".equalsIgnoreCase(tokenizer.sval)) {
                                    stateHttp = 1;
                                    break block1;
                                }
                                if ("content".equalsIgnoreCase(tokenizer.sval)) {
                                    stateContent = 1;
                                    break block1;
                                }
                                if (stateHttp == 2 && "content-type".equalsIgnoreCase(tokenizer.sval)) {
                                    stateHttp = 3;
                                    break block1;
                                }
                                if (stateHttp != 3) {
                                    stateHttp = 0;
                                }
                                if (stateContent == 3) break block1;
                                stateContent = 0;
                                break block1;
                            }
                            case 61: {
                                if (stateHttp == 1) {
                                    stateHttp = 2;
                                    break block1;
                                }
                                if (stateContent == 1) {
                                    stateContent = 2;
                                    break block1;
                                }
                                if (stateHttp != 3) {
                                    stateHttp = 0;
                                }
                                if (stateContent == 3) break block1;
                                stateContent = 0;
                                break block1;
                            }
                            case 34: {
                                if (stateHttp == 2) {
                                    if (!"content-type".equalsIgnoreCase(tokenizer.sval)) break block1;
                                    stateHttp = 3;
                                    break block1;
                                }
                                if (stateContent == 2) {
                                    stateContent = 3;
                                    contentValue = tokenizer.sval;
                                    break block1;
                                }
                                if (stateHttp != 3) {
                                    stateHttp = 0;
                                }
                                if (stateContent == 3) break block1;
                                stateContent = 0;
                                break block1;
                            }
                            default: {
                                if (stateHttp != 3) {
                                    stateHttp = 0;
                                }
                                if (stateContent == 3) break block1;
                                stateContent = 0;
                            }
                        }
                    }
                }
                if (contentValue != null && stateHttp == 3 && stateContent == 3) {
                    return this.getCharsetFromHTTP(contentValue);
                }
                token = tokenizer.nextToken();
            }
        }
        catch (IOException ioe) {
            return null;
        }
        return null;
    }

    public String getCharsetFromHTTP(String contentValue) {
        StringTokenizer t = new StringTokenizer(contentValue, ";");
        while (t.hasMoreTokens()) {
            String charset;
            String parameter = t.nextToken().trim();
            if (!parameter.toLowerCase().startsWith("charset=") || (charset = parameter.substring("charset=".length()).trim()).length() <= 0) continue;
            return charset;
        }
        return null;
    }
}

