/*
 * Decompiled with CFR 0.152.
 */
package com.fuwenchao.utils.templateParser;

import com.fuwenchao.utils.JacksonUtils;
import com.fuwenchao.utils.httpclient.MyCrawler;
import com.fuwenchao.utils.templateParser.DomCFGTree;
import com.fuwenchao.utils.templateParser.DomConfig;
import com.fuwenchao.utils.templateParser.DomSearch;
import com.fuwenchao.utils.templateParser.DomTemplate;
import com.fuwenchao.utils.templateParser.Template;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.html.dom.HTMLDocumentImpl;
import org.cyberneko.html.parsers.DOMFragmentParser;
import org.w3c.dom.DocumentFragment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class DomParser {
    private static final Log LOG = LogFactory.getLog(DomParser.class);
    private static final Log TLOG = LogFactory.getLog((String)"com.bfd.parse.TemplateLog");
    private DomSearch domSearch;
    private String workName = Thread.currentThread().getName();
    private static final Pattern cntFilter = Pattern.compile("\\d+");
    private static final Pattern totalNumReges = Pattern.compile("\u5171(\\d+)\u9875");

    public DomParser() {
        this.domSearch = new DomSearch(this.workName);
    }

    public <T> List<T> transDataToListObjs(T bean, Object data) {
        ArrayList<Object> result = new ArrayList<Object>();
        if (data instanceof List) {
            List list = (List)data;
            for (Map map : list) {
                try {
                    Object nBean = BeanUtils.cloneBean(bean);
                    BeanUtils.populate((Object)nBean, (Map)map);
                    result.add(nBean);
                }
                catch (Exception e) {
                    LOG.warn((Object)"transDataToObj exception. ", (Throwable)e);
                }
            }
        }
        return result;
    }

    private boolean checkDomTemplate(DomTemplate tpl, String type) {
        if (tpl == null) {
            return false;
        }
        ArrayList<DomCFGTree> templates = tpl.getTemplates();
        if (templates.size() == 0) {
            return false;
        }
        for (DomCFGTree tree : templates) {
            if (!tree.getType().equalsIgnoreCase(type)) continue;
            return true;
        }
        return false;
    }

    public Map<String, Object> parseData(String type, String bid, String url, DocumentFragment doc, DomTemplate tpl, String charset) {
        Map<String, Object> rmap = null;
        boolean bOK = this.domSearch.searchNode(doc, tpl, type, url, charset);
        if (!bOK) {
            String err = this.domSearch.getTreeString();
            LOG.info((Object)(this.workName + " NO PARSE RESULT, bid=" + bid + ", type=" + type + ", url=" + url + ", Err:\n" + (err.length() > 200 ? err.substring(0, 190) : err)));
            return rmap;
        }
        rmap = this.domSearch.getParseResult();
        int gotNum = rmap.keySet().size();
        LOG.info((Object)(this.workName + " parse ok, got " + gotNum + " items, template idx=" + this.domSearch.getMatchTmplIDX() + ", bid=" + bid + ", type=" + type + ", url=" + url));
        return rmap;
    }

    public DomSearch getDomSearch() {
        return this.domSearch;
    }

    public DomTemplate getDomTemplate(String bid) {
        return DomConfig.getInstance().getByBizid(bid);
    }

    public List<DomCFGTree> getDomTemplate(String bid, String type) {
        return DomConfig.getInstance().get(bid, type);
    }

    public static DocumentFragment parse2Html(byte[] data, String charset) throws Exception {
        return DomParser.parse2Html(new InputSource(new ByteArrayInputStream(new String(data, charset).replace('\u001a', ' ').getBytes(charset))), charset);
    }

    public static DocumentFragment parse2Html(InputSource input, String encoding) throws Exception {
        DOMFragmentParser parser = new DOMFragmentParser();
        try {
            parser.setFeature("http://cyberneko.org/html/features/augmentations", true);
            parser.setProperty("http://cyberneko.org/html/properties/default-encoding", (Object)encoding);
            parser.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset", true);
            parser.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false);
            parser.setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true);
            parser.setFeature("http://cyberneko.org/html/features/report-errors", false);
        }
        catch (SAXException e) {
            e.printStackTrace();
        }
        HTMLDocumentImpl doc = new HTMLDocumentImpl();
        doc.setErrorChecking(false);
        DocumentFragment res = doc.createDocumentFragment();
        DocumentFragment frag = doc.createDocumentFragment();
        parser.parse(input, frag);
        res.appendChild(frag);
        while (true) {
            frag = doc.createDocumentFragment();
            parser.parse(input, frag);
            if (frag.hasChildNodes()) {
                res.appendChild(frag);
                continue;
            }
            break;
        }
        return res;
    }

    public static DocumentFragment parse2Html(String data, String encoding) throws Exception {
        return DomParser.parse2Html(new InputSource(new ByteArrayInputStream(data.getBytes())), encoding);
    }

    public static DocumentFragment parse2Xml(byte[] bytes, String encoding) {
        try {
            InputSource input = new InputSource(new ByteArrayInputStream(bytes));
            input.setEncoding(encoding);
            return DomParser.parse2Html(input, encoding);
        }
        catch (Exception e) {
            LOG.warn((Object)"Parsing to XML Exception, Err", (Throwable)e);
            return null;
        }
    }

    public static DocumentFragment parse2Xml(String content, String encoding) {
        try {
            InputSource input = new InputSource(new ByteArrayInputStream(content.getBytes()));
            input.setEncoding(encoding);
            return DomParser.parse2Html(input, encoding);
        }
        catch (Exception e) {
            LOG.warn((Object)"Parsing to XML Exception, Err", (Throwable)e);
            return null;
        }
    }

    private static Template getTestTemplate() {
        Template temp = new Template();
        temp.setActive(1);
        return temp;
    }

    private static void testTemplateByHtml(String uri, String cid, String encoding, String dns, boolean isFromNet) {
        DomParser domParser = new DomParser();
        DomTemplate tpl = new DomTemplate(DomParser.getTestTemplate());
        String string = "";
        string = isFromNet ? DomParser.getHtmlFromNet(uri, encoding) : DomParser.getHtmlFromFile(uri, encoding);
        DocumentFragment doc = null;
        try {
            doc = DomParser.parse2Html(string, encoding);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        Map<String, Object> rs = domParser.parseData("item", cid, dns, doc, tpl, encoding);
        System.out.println(JacksonUtils.compressMap(rs));
    }

    private static String getHtmlFromFile(String fname, String encoding) {
        File file = new File(fname);
        byte[] bytes = new byte[(int)file.length()];
        try {
            DataInputStream in = new DataInputStream(new FileInputStream(file));
            in.readFully(bytes);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        String string = null;
        try {
            string = new String(bytes, encoding);
        }
        catch (UnsupportedEncodingException e1) {
            e1.printStackTrace();
        }
        return string;
    }

    private static String getHtmlFromNet(String url, String encoding) {
        MyCrawler crawler = new MyCrawler();
        String string = crawler.get(url)[1];
        System.out.println("html:" + string);
        return string;
    }

    public static void main(String[] args) {
        String workName = "work1";
        DomParser domParser = new DomParser();
        String encoding = "utf-8";
        String dns = "taobao.com";
        String cid = "Mtaobao";
        String url = "http://item.taobao.com/item.htm?id=38368494151";
        DomParser.testTemplateByHtml("d:\\testhtml\\yhd.html", cid, encoding, dns, false);
        DomParser.testTemplateByHtml(url, cid, encoding, dns, true);
        System.exit(0);
    }

    private static Map<String, Object> initTaskMap(Map<String, Object> spiderData, String url, String iid, String cid) {
        HashMap<String, Object> map = new HashMap<String, Object>();
        HashMap<String, Object> taskData = new HashMap<String, Object>();
        taskData.put("url", url);
        taskData.put("type", "item");
        taskData.put("iid", iid);
        taskData.put("purl", "");
        taskData.put("datatype", "html");
        taskData.put("ajaxdatatype", "1");
        taskData.put("projname", "ItemMonitor");
        taskData.put("cate", "cate");
        taskData.put("parsetype", 0);
        taskData.put("cid", cid);
        map.put("taskdata", taskData);
        map.put("spiderdata", spiderData);
        return map;
    }

    private class ParseRS {
        private int parseCode = -1;
        private Map<String, Object> rs = null;

        private ParseRS() {
        }

        public int getParseCode() {
            return this.parseCode;
        }

        public void setParseCode(int parseCode) {
            this.parseCode = parseCode;
        }

        public Map<String, Object> getRs() {
            return this.rs;
        }

        public void setRs(Map<String, Object> rs) {
            this.rs = rs;
        }
    }
}

