/*
 * Decompiled with CFR 0.152.
 */
package com.bfd.crawler.utils.htmlcleaner;

import com.bfd.crawler.utils.JsonUtils;
import com.bfd.crawler.utils.crawler.httpclient43.crawler.httpclient.Crawl4HttpClient;
import com.bfd.crawler.utils.htmlcleaner.SelfNode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.htmlcleaner.BaseToken;
import org.htmlcleaner.CommentNode;
import org.htmlcleaner.ContentNode;
import org.htmlcleaner.EndTagToken;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;

public class HtmlCleanerUtil {
    private static final Log LOG = LogFactory.getLog(HtmlCleanerUtil.class);

    public static void getAllNodePathByTagNode(TagNode node, int level) {
        if (node == null) {
            return;
        }
        List subNodes = node.getAllChildren();
        if (subNodes.size() == 0) {
            return;
        }
        for (BaseToken token : subNodes) {
            if (!(token instanceof TagNode)) continue;
            TagNode temp = (TagNode)token;
            StringBuffer printStr = new StringBuffer();
            for (int i = 0; i < level; ++i) {
                printStr.append("\t");
            }
            printStr.append(temp.getName());
            printStr.append(JsonUtils.toJSONString(temp.getAttributes()));
            System.out.println(printStr);
            HtmlCleanerUtil.getAllNodePathByTagNode(temp, level + 1);
        }
    }

    public static TagNode getNodeByXpath(TagNode node, String xpath) {
        Object[] objs = null;
        try {
            objs = node.evaluateXPath(xpath);
        }
        catch (XPatherException e) {
            e.printStackTrace();
            return null;
        }
        if (objs.length == 0) {
            LOG.info((Object)(xpath + " not get node will return null"));
            return null;
        }
        TagNode tag = (TagNode)objs[0];
        return tag;
    }

    public static List<TagNode> getNodesByXpath(TagNode node, String xpath) {
        Object[] objs = null;
        ArrayList<TagNode> nodes = new ArrayList<TagNode>();
        try {
            objs = node.evaluateXPath(xpath);
        }
        catch (XPatherException e) {
            e.printStackTrace();
            return nodes;
        }
        if (objs.length == 0) {
            LOG.info((Object)(xpath + " not get node will return null"));
            return nodes;
        }
        for (Object o : objs) {
            TagNode tag = (TagNode)o;
            nodes.add(tag);
        }
        return nodes;
    }

    public static String getValueByXpath(TagNode node, String xpath, String defaultValue, String attrName) {
        Object[] objs = null;
        try {
            objs = node.evaluateXPath(xpath);
        }
        catch (XPatherException e) {
            e.printStackTrace();
            return defaultValue;
        }
        if (objs.length == 0) {
            return defaultValue;
        }
        TagNode tag = (TagNode)objs[0];
        if ("text".equalsIgnoreCase(attrName)) {
            return tag.getText().toString().trim();
        }
        return tag.getAttributeByName(attrName).trim();
    }

    public static String getContentByXpath(TagNode node, String xpath, String defaultValue, String attrName) {
        Object[] objs = null;
        try {
            objs = node.evaluateXPath(xpath);
        }
        catch (XPatherException e) {
            e.printStackTrace();
            return defaultValue;
        }
        if (objs.length == 0) {
            return defaultValue;
        }
        TagNode tagNode = (TagNode)objs[0];
        List tags = tagNode.getAllChildren();
        StringBuffer sb = new StringBuffer();
        for (BaseToken tag : tags) {
            if (!(tag instanceof ContentNode) || ((ContentNode)tag).getContent().trim().length() == 0) continue;
            sb.append(((ContentNode)tag).getContent());
        }
        return sb.toString();
    }

    public static List<String> getAllHref(String url, String html) {
        HtmlCleaner cleaner = new HtmlCleaner();
        TagNode root = cleaner.clean(html);
        ArrayList<String> rs = new ArrayList<String>();
        try {
            Object[] objs = root.evaluateXPath("//a");
            String tmpUrl = "";
            for (Object o : objs) {
                TagNode href = (TagNode)o;
                tmpUrl = href.getAttributeByName("href");
                if (tmpUrl == null || tmpUrl.trim().length() == 0) continue;
                rs.add(tmpUrl);
            }
        }
        catch (XPatherException e) {
            e.printStackTrace();
        }
        return rs;
    }

    public static void removeByTag(TagNode root, String tagType) {
        try {
            Object[] scripts = root.evaluateXPath(tagType);
            System.out.println("scripts size is " + scripts.length);
            for (Object obj : scripts) {
                ((TagNode)obj).removeFromTree();
            }
        }
        catch (XPatherException e) {
            e.printStackTrace();
        }
    }

    public static void getMaxLengthXpath(List<SelfNode> allNodes) {
    }

    public static void getMaxCommonXpath(List<SelfNode> allNodes) {
    }

    public static String getContentByTagNode(TagNode tagNode) {
        List tags = tagNode.getAllChildren();
        StringBuffer sb = new StringBuffer();
        for (BaseToken tag : tags) {
            if (!(tag instanceof ContentNode) || ((ContentNode)tag).getContent().trim().length() == 0) continue;
            sb.append(((ContentNode)tag).getContent());
        }
        return sb.toString();
    }

    public static void getXpathByContent(List<SelfNode> allNodes, String content) {
        int size = allNodes.size();
        for (int i = 0; i < size; ++i) {
            SelfNode temp = allNodes.get(i);
            if (temp.getContent() == null || temp.getContent().trim().length() == 0 || !temp.getContent().equals(content) && temp.getContent().indexOf(content) <= 0) continue;
            System.out.println(content + " xpath is :" + temp.getXpath());
        }
    }

    public static void getAllNodeXpath(SelfNode node, List<SelfNode> allNodes) {
        if (node == null) {
            return;
        }
        List subNodes = node.getTagNode().getAllChildren();
        if (subNodes.size() == 0) {
            return;
        }
        HashMap<String, Integer> tagNum = new HashMap<String, Integer>();
        for (BaseToken token : subNodes) {
            System.out.println("baseToken:" + token.getRow());
            if (!(token instanceof TagNode)) {
                System.out.println("has filter!class is " + token.getClass().getName());
                if (token instanceof ContentNode) {
                    System.out.println("content:" + ((ContentNode)token).getContent());
                }
                if (token instanceof EndTagToken) {
                    System.out.println("endtagtoken:" + ((EndTagToken)token).getName());
                }
                if (!(token instanceof CommentNode)) continue;
                System.out.println("comment:" + ((CommentNode)token).getCommentedContent());
                continue;
            }
            TagNode temp = (TagNode)token;
            if (tagNum.get(temp.getName()) == null) {
                tagNum.put(temp.getName(), 1);
            } else {
                tagNum.put(temp.getName(), (Integer)tagNum.get(temp.getName()) + 1);
            }
            SelfNode selfNode = new SelfNode();
            selfNode.setTagNode(temp);
            selfNode.setXpath(node.getXpath() + "/" + temp.getName() + "[" + tagNum.get(temp.getName()) + "]");
            selfNode.setContent(HtmlCleanerUtil.getContentByTagNode(temp));
            selfNode.setClassStr(temp.getAttributeByName("class"));
            selfNode.setId(temp.getAttributeByName("id"));
            allNodes.add(selfNode);
            HtmlCleanerUtil.getAllNodeXpath(selfNode, allNodes);
        }
    }

    public static void getChilds(TagNode node) {
        System.out.println("-----------child----------------");
        List nodes = node.getAllChildren();
        for (BaseToken nodet : nodes) {
            System.out.println(nodet.getCol() + ":" + nodet.getRow() + ":" + nodet.getClass());
            if (nodet instanceof ContentNode) {
                System.out.println("content:" + ((ContentNode)nodet).getContent());
            }
            if (nodet instanceof EndTagToken) {
                System.out.println("endtagtoken:" + ((EndTagToken)nodet).getName());
            }
            if (nodet instanceof CommentNode) {
                System.out.println("comment:" + ((CommentNode)nodet).getCommentedContent());
            }
            if (nodet instanceof TagNode) {
                System.out.println("TagNode:" + ((TagNode)nodet).getAttributeByName("id") + ":" + ((TagNode)nodet).getAttributeByName("class"));
            }
            if (!(nodet instanceof CommentNode)) continue;
            System.out.println("comment:" + ((CommentNode)nodet).getCommentedContent());
        }
    }

    public static void main(String[] args) {
        String url = "http://news.cnblogs.com/";
        String html = Crawl4HttpClient.getHtmlByDefaultHeader(url)[0];
        ArrayList<String> exceptStr = new ArrayList<String>();
        exceptStr.add("rss");
        exceptStr.add("search");
    }
}

