/*
 * Decompiled with CFR 0.152.
 */
package org.semanticweb.yars.nx.clean;

import java.io.PrintStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.semanticweb.yars.nx.DateTimeLiteral;
import org.semanticweb.yars.nx.Literal;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.parser.ParseException;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Cleaner {
    static SimpleDateFormat _iso = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
    static SimpleDateFormat[] _formats = new SimpleDateFormat[]{new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"), new SimpleDateFormat("dd-MMM-yy"), new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss"), new SimpleDateFormat("EEE', 'dd' 'MMM' 'yyyy' 'HH:mm:ss' 'Z"), new SimpleDateFormat("MM/dd/yy")};

    public static void clean(Iterator<Node[]> in, PrintStream out, int length, boolean datatype) {
        int drop = 0;
        long linecount = 0L;
        while (in.hasNext()) {
            boolean write = true;
            Node[] line = in.next();
            ++linecount;
            if (length != -1 && length != line.length) {
                System.err.println(linecount + ": doesn't have " + length + " elements but " + line.length);
                ++drop;
                continue;
            }
            Node[] clean = new Node[line.length];
            for (int i = 0; i < line.length; ++i) {
                try {
                    clean[i] = Cleaner.clean(line[i], datatype);
                    continue;
                }
                catch (Exception e) {
                    ++drop;
                    write = false;
                    System.err.println(linecount + ": cannot parse entry " + line[i] + " " + e.getMessage());
                }
            }
            if (!write) continue;
            for (Node n : clean) {
                out.print(n.toN3() + " ");
            }
            out.println(".");
        }
        System.err.println("Processed  " + linecount + " statements");
        System.err.println("Dropped " + drop + " statements");
    }

    public static Node clean(Node raw, boolean datatype) throws URISyntaxException, MalformedURLException {
        if (raw instanceof Resource) {
            return new Resource(Cleaner.normaliseURI(raw.toString()));
        }
        if (raw instanceof Literal) {
            Literal l = (Literal)raw;
            String data = Cleaner.removeControlChars(l.toString());
            data = Cleaner.stripHTML(data);
            if ((data = data.trim()).length() <= 0) {
                throw new URISyntaxException("", "dropping emtpy literals");
            }
            data = Literal.escapeForNx(data);
            if (l.getDatatype() == null && l.getLanguageTag() == null) {
                return Cleaner.normaliseLiteral(new Literal(data), datatype);
            }
            if (l.getDatatype() != null && l.getLanguageTag() != null) {
                return new Literal(data, l.getLanguageTag(), l.getDatatype());
            }
            if (l.getDatatype() != null) {
                return new Literal(data, l.getDatatype());
            }
            return new Literal(data, l.getLanguageTag());
        }
        return raw;
    }

    public static Literal normaliseLiteral(Literal l, boolean datatype) throws URISyntaxException {
        if (l.getLanguageTag() != null || l.getDatatype() != null) {
            String data = l.getData().trim();
            Literal n = null;
            n = l.getLanguageTag() != null ? new Literal(data, l.getLanguageTag()) : new Literal(data, l.getDatatype());
            return n;
        }
        if (!datatype) {
            String data = l.getData().trim();
            Literal n = null;
            n = l.getLanguageTag() != null ? new Literal(data, l.getLanguageTag()) : new Literal(data, l.getDatatype());
            return n;
        }
        DateTimeLiteral n = null;
        String data = l.getData().trim();
        Pattern numex = Pattern.compile("^[-+]?[0-9]*.?[0-9]+$");
        Date date = null;
        try {
            date = DateTimeLiteral.parseISO8601(data);
            n = new DateTimeLiteral(data);
            if (n != null) {
                return n;
            }
        }
        catch (ParseException nfe) {
        }
        catch (Throwable e) {
            // empty catch block
        }
        for (SimpleDateFormat f : _formats) {
            try {
                date = f.parse(data);
                n = new DateTimeLiteral(_iso.format(date));
                if (n == null) continue;
                break;
            }
            catch (java.text.ParseException pe) {
            }
            catch (Throwable e) {
                // empty catch block
            }
        }
        if (n != null) {
            return n;
        }
        return l;
    }

    public static String normaliseURI(String uri) throws URISyntaxException, MalformedURLException {
        URI raw = new URI(uri.replaceAll(" ", "%20"));
        if (!(raw = raw.normalize()).isOpaque()) {
            int port;
            String host;
            String path;
            String scheme = raw.getScheme();
            if (scheme == null) {
                throw new URISyntaxException(uri, "dropping uris without scheme");
            }
            scheme = scheme.toLowerCase();
            if (scheme.startsWith("http")) {
                raw.toURL();
            }
            if ((path = raw.getPath()) != null) {
                if (path.equals("")) {
                    path = "/";
                } else if (path.endsWith("/index.html") || path.endsWith("/index.htm") || path.endsWith("/index.asp") || path.toLowerCase().endsWith("/default.asp") || path.toLowerCase().endsWith("/default.aspx") || path.endsWith("/index.jsp") || path.endsWith("/index.php")) {
                    path = path.substring(0, path.lastIndexOf(47) + 1);
                }
            }
            if ((host = raw.getHost()) != null) {
                host = host.toLowerCase();
            }
            if ((port = raw.getPort()) == 80) {
                port = -1;
            }
            URI u = new URI(scheme, raw.getUserInfo(), host, port, path, raw.getQuery(), raw.getFragment());
            return u.toString();
        }
        String scheme = raw.getScheme();
        if (scheme == null) {
            throw new URISyntaxException("", "dropping uris without scheme");
        }
        return uri;
    }

    private static String removeControlChars(String lit) {
        StringBuffer result = new StringBuffer();
        for (int i = 0; i < lit.length(); ++i) {
            char c = lit.charAt(i);
            char cInt = c;
            if (cInt >= '\u0000' && cInt <= '\u001f') {
                result.append(" ");
                continue;
            }
            result.append(c);
        }
        return result.toString();
    }

    private static String stripHTML(String literal) {
        return literal.replaceAll("\\<.*?\\>", "");
    }
}

