/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.di.law.bubing.util;

import com.google.common.base.Charsets;
import it.unimi.di.law.bubing.util.BURL;
import it.unimi.di.law.warc.filters.URIResponse;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.lang.MutableString;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.net.URI;
import java.util.Arrays;
import java.util.Set;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class URLRespectsRobots {
    private static final Logger LOGGER = LoggerFactory.getLogger(URLRespectsRobots.class);
    public static final int MAX_TO_STRING_ROBOTS = 30;
    public static final char[][] EMPTY_ROBOTS_FILTER = new char[0][];

    private URLRespectsRobots() {
    }

    public static char[][] toSortedPrefixFreeCharArrays(Set<String> set) {
        int size = set.size();
        Object[] s = set.toArray(new String[size]);
        Arrays.sort(s);
        int j = 0;
        if (size != 0) {
            for (int i = 1; i < size; ++i) {
                if (((String)s[i]).startsWith((String)s[j])) continue;
                s[++j] = s[i];
            }
            ++j;
        }
        char[][] result = new char[j][];
        for (int i = 0; i < j; ++i) {
            result[i] = ((String)s[i]).toCharArray();
        }
        return result;
    }

    public static char[][] parseRobotsReader(Reader content, String userAgent) throws IOException {
        int lineFirstToken;
        ObjectOpenHashSet set = new ObjectOpenHashSet();
        ObjectOpenHashSet setStar = new ObjectOpenHashSet();
        boolean doesMatter = false;
        boolean specific = false;
        boolean generic = false;
        boolean starSection = false;
        StreamTokenizer st = new StreamTokenizer((Reader)new FastBufferedReader(content));
        st.resetSyntax();
        st.eolIsSignificant(true);
        st.wordChars(33, 255);
        st.whitespaceChars(0, 32);
        st.ordinaryChar(35);
        st.lowerCaseMode(false);
        block5: while ((lineFirstToken = st.nextToken()) != -1) {
            block0 : switch (lineFirstToken) {
                case 10: {
                    doesMatter = false;
                    break;
                }
                case -2: 
                case 35: {
                    int token;
                    while ((token = st.nextToken()) != 10) {
                        if (token != -1) continue;
                        break block0;
                    }
                    continue block5;
                }
                case -3: {
                    int token;
                    if (st.sval.equalsIgnoreCase("user-agent:")) {
                        token = st.nextToken();
                        if (token == -3) {
                            if (StringUtils.startsWithIgnoreCase((String)userAgent, (String)st.sval)) {
                                doesMatter = true;
                                specific = true;
                                starSection = false;
                            } else if (st.sval.equals("*")) {
                                starSection = true;
                                generic = true;
                            } else {
                                starSection = false;
                            }
                        }
                        while (token != 10 && token != -1) {
                            token = st.nextToken();
                        }
                        continue block5;
                    }
                    if (st.sval.equalsIgnoreCase("disallow:")) {
                        token = st.nextToken();
                        if (token == 10) {
                            if (doesMatter) {
                                set.clear();
                            } else if (starSection) {
                                setStar.clear();
                            }
                        } else if (token == -3) {
                            String disallowed = st.sval;
                            if (disallowed.endsWith("*")) {
                                disallowed = disallowed.substring(0, disallowed.length() - 1);
                            }
                            if (doesMatter) {
                                set.add(disallowed);
                            } else if (starSection) {
                                setStar.add(disallowed);
                            }
                        }
                        while (token != 10 && token != -1) {
                            token = st.nextToken();
                        }
                        continue block5;
                    }
                    if (!LOGGER.isTraceEnabled()) continue block5;
                    LOGGER.trace("Line first token {} ununderstandable in robots.txt", (Object)st.sval);
                    break;
                }
                default: {
                    if (!LOGGER.isTraceEnabled()) continue block5;
                    LOGGER.trace("Found unknown token type {} in robots.txt", (Object)lineFirstToken);
                }
            }
        }
        if (specific) {
            return URLRespectsRobots.toSortedPrefixFreeCharArrays((Set<String>)set);
        }
        if (!specific && generic) {
            return URLRespectsRobots.toSortedPrefixFreeCharArrays((Set<String>)setStar);
        }
        return URLRespectsRobots.toSortedPrefixFreeCharArrays((Set<String>)set);
    }

    public static char[][] parseRobotsResponse(URIResponse robotsResponse, String userAgent) throws IOException {
        int status = robotsResponse.response().getStatusLine().getStatusCode();
        if (status / 100 != 2) {
            LOGGER.info("Got status " + status + " while fetching robots: URL was " + robotsResponse.uri());
        }
        if (status / 100 == 4 || status / 100 == 5) {
            return EMPTY_ROBOTS_FILTER;
        }
        if (status / 100 != 2 && status / 100 != 3) {
            return null;
        }
        BOMInputStream bomInputStream = new BOMInputStream(robotsResponse.response().getEntity().getContent(), true);
        int bomLength = bomInputStream.hasBOM() ? bomInputStream.getBOM().length() : 0;
        bomInputStream.skip((long)bomLength);
        char[][] result = URLRespectsRobots.parseRobotsReader(new InputStreamReader((InputStream)bomInputStream, Charsets.UTF_8), userAgent);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Robots for {} successfully got with status {}: {}", new Object[]{robotsResponse.uri(), status, URLRespectsRobots.toString(result)});
        }
        return result;
    }

    private static final int compare(char[] left, String right) {
        int l = Math.min(left.length, right.length());
        for (int i = 0; i < l; ++i) {
            int result = left[i] - right.charAt(i);
            if (result == 0) continue;
            return result;
        }
        return left.length - right.length();
    }

    private static final boolean doesNotStartsWith(String s, char[] prefix) {
        if (prefix.length > s.length()) {
            return true;
        }
        int i = prefix.length;
        while (i-- != 0) {
            if (s.charAt(i) == prefix[i]) continue;
            return true;
        }
        return false;
    }

    public static boolean apply(char[][] robotsFilter, URI url) {
        if (robotsFilter.length == 0) {
            return true;
        }
        String pathQuery = BURL.pathAndQuery(url);
        int from = 0;
        int to = robotsFilter.length - 1;
        while (from <= to) {
            int mid = from + to >>> 1;
            int cmp = URLRespectsRobots.compare(robotsFilter[mid], pathQuery);
            if (cmp < 0) {
                from = mid + 1;
                continue;
            }
            if (cmp > 0) {
                to = mid - 1;
                continue;
            }
            return false;
        }
        return from == 0 ? true : URLRespectsRobots.doesNotStartsWith(pathQuery, robotsFilter[from - 1]);
    }

    public static String toString(char[][] robotsFilter) {
        if (robotsFilter == null) {
            return "[]";
        }
        StringBuilder stringBuilder = new StringBuilder().append('[');
        int n = Math.min(robotsFilter.length, 30);
        for (int i = 0; i < n; ++i) {
            if (i != 0) {
                stringBuilder.append(",");
            }
            stringBuilder.append('\"').append(robotsFilter[i]).append('\"');
        }
        if (n != robotsFilter.length) {
            stringBuilder.append(",...");
        }
        return stringBuilder.append(']').toString();
    }

    public static void main(String[] arg) throws IOException {
        char[][] robotsResult;
        for (char[] a : robotsResult = URLRespectsRobots.parseRobotsReader(new FileReader(arg[0]), arg[1])) {
            System.err.println(new String(a));
        }
        FastBufferedReader in = new FastBufferedReader((Reader)new InputStreamReader(System.in, Charsets.US_ASCII));
        MutableString s = new MutableString();
        while (in.readLine(s) != null) {
            URI uri = BURL.parse(s);
            System.out.println(URLRespectsRobots.apply(robotsResult, uri) + "\t" + uri);
        }
        in.close();
    }
}

