/*
 * Decompiled with CFR 0.152.
 */
package com.bfd.crawler.utils.templateparser;

import com.bfd.crawler.utils.DataUtil;
import com.bfd.crawler.utils.TextUtil;
import com.bfd.crawler.utils.templateparser.DomCFGBlock;
import com.bfd.crawler.utils.templateparser.DomCFGField;
import com.bfd.crawler.utils.templateparser.DomCFGTree;
import com.bfd.crawler.utils.templateparser.DomTemplate;
import com.bfd.crawler.utils.templateparser.Template;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class DomSearch {
    private static final Log LOG = LogFactory.getLog(DomSearch.class);
    private int printMode;
    private int idxMatched;
    private boolean printTreeWhileFailed;
    private boolean matchIndex;
    private static final int iMaxChildNum = 200;
    private int lowestMatchLevel;
    private int maxPrintLevel;
    private StringBuffer treePrintBuffer;
    private Map<String, Object> resultMap;
    private List<Map<String, String>> imgtasks;
    private Map<String, Map<String, String>> contentimgs;
    private int imgIndex = 0;
    private DocumentFragment doc;
    private String cid;
    private String type;
    private boolean typeIsInfo = false;
    private boolean hasImgs = false;
    private String url;
    private String charset;
    private String workerName;
    private static final Pattern imgSuffixCompiler = Pattern.compile("\\.(BMP|JPG|JPEG|PNG|GIF)(?![a-z])", 2);
    private static final Pattern imgExcept = Pattern.compile("['\"\\s]");
    private static final Pattern backgroundImg = Pattern.compile("background:url\\(([^\"]+)\\)\\s+");
    private Transformer transformer;

    public Transformer getTransformer() {
        if (this.transformer == null) {
            try {
                this.transformer = TransformerFactory.newInstance().newTransformer();
                this.transformer.setOutputProperty("indent", "no");
                this.transformer.setOutputProperty("cdata-section-elements", "no");
                this.transformer.setOutputProperty("method", "html");
            }
            catch (Exception e) {
                LOG.warn((Object)e);
            }
        }
        return this.transformer;
    }

    public DomSearch() {
        this.treePrintBuffer = new StringBuffer(32768);
        this.resultMap = new HashMap<String, Object>();
        this.imgtasks = new ArrayList<Map<String, String>>();
        this.contentimgs = new HashMap<String, Map<String, String>>();
        this.printTreeWhileFailed = true;
        this.lowestMatchLevel = -1;
        this.doc = null;
        this.maxPrintLevel = 5;
        this.printMode = 1;
        this.imgIndex = 0;
    }

    public DomSearch(DocumentFragment doc) {
        this.resultMap = new HashMap<String, Object>();
        this.treePrintBuffer = new StringBuffer(32768);
        this.printTreeWhileFailed = true;
        this.lowestMatchLevel = -1;
        this.maxPrintLevel = 5;
        this.doc = doc;
        this.printMode = 1;
        this.imgIndex = 0;
        this.workerName = Thread.currentThread().getName();
    }

    private void reInit() {
        this.resultMap.clear();
        this.imgtasks.clear();
        this.contentimgs.clear();
        this.charset = null;
        this.hasImgs = false;
        this.imgIndex = 0;
        this.treePrintBuffer.delete(0, this.treePrintBuffer.length());
    }

    public boolean hasImgs() {
        return this.hasImgs;
    }

    public Map<String, Object> getParseResult() {
        return this.resultMap;
    }

    public int getMatchTmplIDX() {
        return this.idxMatched;
    }

    public void setPrintMode(int mode) {
        this.printMode = mode != 2 && mode != 0 ? 1 : mode;
        this.printTreeWhileFailed = true;
    }

    public void setMaxPrintLevel(int level) {
        if (level > 0 && (level < 30 || level == 99)) {
            this.maxPrintLevel = level;
        }
        this.printTreeWhileFailed = true;
    }

    public void setPrintFlag(boolean bPrint) {
        this.printTreeWhileFailed = bPrint;
    }

    public boolean urlIsOK(String url, String pattern) {
        if (pattern == null || pattern.length() < 1) {
            return true;
        }
        return Pattern.matches(pattern, url);
    }

    public String getTreeString() {
        return this.treePrintBuffer.toString();
    }

    public Node searchNode(DocumentFragment doc, String path) {
        Node fNode;
        this.lowestMatchLevel = -1;
        this.doc = doc;
        Node root = this.searchNode((Node)doc, "html@@@0/body@@@0");
        if (root == null) {
            root = this.searchNode((Node)doc, "body@@@0");
        }
        if (root == null) {
            LOG.info((Object)("There is no root element, path=" + path));
            root = doc;
        }
        if ((fNode = this.searchNode(root, path)) == null) {
            LOG.info((Object)("There is no parse result, path=" + path));
            return null;
        }
        return fNode;
    }

    private String getNodeData(Node node, String exRegex, String defaultVal, boolean isImg, boolean collectImg, String attName) {
        String txt = "";
        if (node.getNodeName().equalsIgnoreCase("img") || isImg) {
            return this.getImgNodeData(node, attName, false);
        }
        StringBuffer sb = new StringBuffer();
        this.recurseGetContent(node, sb, collectImg);
        txt = sb.toString();
        if (txt.indexOf("&amp;#") >= 0 || txt.indexOf("&#") >= 0) {
            txt = txt.replaceAll("&amp;", "&");
            Matcher matcher = Pattern.compile("&#(\\d+);").matcher(txt);
            int start = 0;
            StringBuilder chars = new StringBuilder();
            while (matcher.find()) {
                int st = matcher.start();
                String group = matcher.group(1);
                if (st > start) {
                    chars.append(txt.substring(start, st));
                }
                chars.append((char)Integer.parseInt(group));
                start = st + group.length() + 3;
            }
            if (start < txt.length()) {
                chars.append(txt.substring(start));
            }
            txt = chars.toString();
        }
        txt = exRegex != null && exRegex.length() > 0 ? this.extraHandle(txt, exRegex, defaultVal) : new String(this.trimBadChar(txt.getBytes()));
        txt = TextUtil.removeAllHtmlTags(txt);
        return txt;
    }

    private byte[] trimBadChar(byte[] data) {
        int j = 0;
        boolean ff = false;
        int spc = 0;
        int cspc = 0;
        for (int i = 0; i < data.length; ++i) {
            if (data[i] >= 0 && data[i] <= 32) {
                ++spc;
                if (j < 1) continue;
                if (ff) {
                    if (data[i] != 9 || data[j - 1] == 9) continue;
                    data[j - 1] = 9;
                    continue;
                }
                data[j++] = data[i] == 9 ? 9 : 32;
                ff = true;
                continue;
            }
            if ((data[i] & 0xFF) == 194 && i < data.length - 1 && ((data[i + 1] & 0xFF) == 160 || (data[i + 1] & 0xFF) == 161)) {
                if (!ff) {
                    data[j++] = 32;
                    ff = true;
                }
                ++i;
                ++cspc;
                continue;
            }
            if ((data[i] & 0xFF) == 227 && i < data.length - 2 && (data[i + 1] & 0xFF) == 128 && (data[i + 2] & 0xFF) == 128) {
                if (!ff) {
                    data[j++] = 32;
                    ff = true;
                }
                i += 2;
                ++cspc;
                continue;
            }
            data[j++] = data[i];
            ff = false;
        }
        if (j > 0 && (data[j - 1] == 32 || data[j - 1] == 9)) {
            --j;
        }
        return Arrays.copyOfRange(data, 0, j);
    }

    public String getNodeData(Node node, String exRegex, String defaultVal, boolean isImg, boolean collectImg) {
        return this.getNodeData(node, exRegex, defaultVal, isImg, collectImg, "");
    }

    public String getNodeData(Node node, String exRegex, String defaultVal, boolean isImg) {
        return this.getNodeData(node, exRegex, defaultVal, isImg, false);
    }

    public String getNodeData(Node node, String exRegex, String defaultVal) {
        return this.getNodeData(node, exRegex, defaultVal, false, false);
    }

    private String getImgNodeData(Node node) {
        return this.getImgNodeData(node, "", null, false);
    }

    private String getImgNodeData(Node node, String att, boolean FilteImg) {
        return this.getImgNodeData(node, att, null, FilteImg);
    }

    private String getImgNodeData(Node node, StringBuilder rawImg, boolean FilteImg) {
        return this.getImgNodeData(node, "", rawImg, FilteImg);
    }

    private String getImgNodeData(Node node, String att, StringBuilder rawImg, boolean FilteImg) {
        String srcLink = "";
        if (StringUtils.isNotEmpty((String)att)) {
            srcLink = this.filteImgUrl(DomSearch.getNodeAttr(node, att, ""), att);
        }
        if (StringUtils.isEmpty((String)srcLink)) {
            String tmpAttName = "";
            NamedNodeMap nmap = node.getAttributes();
            String otherLink = "";
            String backLink = "";
            for (int i = 0; i < nmap.getLength(); ++i) {
                Node nd = nmap.item(i);
                String attName = nd.getNodeName().trim();
                String attValue = nd.getNodeValue().trim();
                if (attName.equalsIgnoreCase("src")) {
                    srcLink = attValue;
                    continue;
                }
                if (attName.equalsIgnoreCase("style")) {
                    backLink = attValue;
                    tmpAttName = attName;
                    continue;
                }
                if ("onerror".equalsIgnoreCase(attName) || imgExcept.matcher(attValue.trim()).find() || "alt".equalsIgnoreCase(attName) || !imgSuffixCompiler.matcher(attValue).find()) continue;
                otherLink = attValue;
                tmpAttName = attName;
            }
            if (StringUtils.isNotEmpty((String)srcLink)) {
                LOG.info((Object)("got img via srcLink, cid=" + this.cid + ", type=" + this.type + ", url=" + this.url + ", attName=" + tmpAttName + ", imglink=" + otherLink));
            } else if (StringUtils.isNotEmpty((String)otherLink)) {
                srcLink = otherLink;
                LOG.info((Object)("got img via otherLink, cid=" + this.cid + ", type=" + this.type + ", url=" + this.url + ", attName=" + tmpAttName + ", imglink=" + otherLink));
            } else if (StringUtils.isEmpty((String)srcLink) && StringUtils.isNotEmpty((String)(srcLink = this.filteImgUrl(backLink, tmpAttName)))) {
                LOG.info((Object)("got img via style, cid=" + this.cid + ", type=" + this.type + ", url=" + this.url + ", imglink=" + otherLink));
            }
        }
        if (StringUtils.isEmpty((String)srcLink)) {
            return "";
        }
        if (srcLink.indexOf(":") <= 8 && !srcLink.startsWith("http") || srcLink.length() > 300) {
            return "";
        }
        if (rawImg != null) {
            rawImg.append(srcLink);
        }
        return srcLink;
    }

    private String filteImgUrl(String link, String att) {
        if ("style".equalsIgnoreCase(att) && StringUtils.isNotEmpty((String)link)) {
            Matcher matcher = backgroundImg.matcher(link);
            if (matcher.find()) {
                link = matcher.group(1);
            } else {
                return "";
            }
        }
        return link;
    }

    private String getZipAnEncodedRawContent(Node node) {
        try {
            return DataUtil.zipAndEncode(this.getRawContent(node), "utf8");
        }
        catch (Exception e) {
            LOG.warn((Object)"zip and encode raw content exception,", (Throwable)e);
            return null;
        }
    }

    private String getRawContent(Node node) {
        try {
            StringWriter writer = new StringWriter();
            this.getTransformer().transform(new DOMSource(node), new StreamResult(writer));
            return writer.getBuffer().toString();
        }
        catch (Exception e) {
            LOG.warn((Object)e);
            return null;
        }
    }

    private void recurseGetContent(Node node, StringBuffer sbuf, boolean collectImg) {
        String nv;
        if ("BR".equalsIgnoreCase(node.getNodeName())) {
            sbuf.append("\t");
            return;
        }
        if ("LI".equalsIgnoreCase(node.getNodeName())) {
            sbuf.append("\t");
        }
        if ("TR".equalsIgnoreCase(node.getNodeName())) {
            sbuf.append("\t");
        }
        if ("TD".equalsIgnoreCase(node.getNodeName())) {
            sbuf.append(": ");
        }
        String[] excludeNode = new String[]{"script", "style", "#comment"};
        for (int i = 0; i < excludeNode.length; ++i) {
            if (!node.getNodeName().equalsIgnoreCase(excludeNode[i])) continue;
            return;
        }
        if (collectImg && node.getNodeName().equalsIgnoreCase("img") && this.imgIndex < 30) {
            this.getContentImgs(node);
        }
        if ((nv = node.getNodeValue()) != null) {
            String formatStr = "";
            if (nv.endsWith("...")) {
                formatStr = this.formatNodeValue(node);
            }
            if (formatStr.length() > 0) {
                sbuf.append(formatStr);
            } else {
                sbuf.append(nv);
            }
        }
        NodeList childs = node.getChildNodes();
        for (int i = 0; i < childs.getLength(); ++i) {
            this.recurseGetContent(childs.item(i), sbuf, collectImg);
        }
    }

    private String formatNodeValue(Node node) {
        if (node.getParentNode() == null) {
            return "";
        }
        NamedNodeMap map = node.getParentNode().getAttributes();
        if (map.getLength() == 0) {
            return "";
        }
        String fullStr = "";
        if (map.getNamedItem("title") != null) {
            fullStr = map.getNamedItem("title").getNodeValue();
        }
        if (map.getNamedItem("alt") != null) {
            fullStr = map.getNamedItem("alt").getNodeValue();
        }
        if (fullStr.length() == 0) {
            return "";
        }
        String commonStr = TextUtil.getMaxCommonStr(node.getNodeValue(), fullStr);
        String rs = node.getNodeValue().replace(commonStr + "...", fullStr);
        return rs;
    }

    private void getContentImgs(Node node) {
        StringBuilder rawImg = new StringBuilder();
        String img = this.getImgNodeData(node, rawImg, true);
        if (StringUtils.isNotEmpty((String)img)) {
            String imgtag = "img_" + this.imgIndex;
            String raw = rawImg.toString();
            HashMap<String, String> imgUnit = new HashMap<String, String>();
            imgUnit.put("imgtag", imgtag);
            imgUnit.put("img", img);
            imgUnit.put("rawimg", raw);
            HashMap<String, String> rawMap = new HashMap<String, String>();
            rawMap.put("imgtag", new String(imgtag));
            rawMap.put("img", new String(img));
            rawMap.put("rawimg", new String(raw));
            HashMap<String, String> tagMap = new HashMap<String, String>();
            tagMap.put("imgtag", new String(imgtag));
            tagMap.put("img", new String(img));
            tagMap.put("rawimg", new String(raw));
            this.imgtasks.add(imgUnit);
            this.contentimgs.put(raw, rawMap);
            this.contentimgs.put(imgtag, tagMap);
            ++this.imgIndex;
        }
    }

    public boolean executeTemplateParse(DocumentFragment doc, DomTemplate domTmpl, String type, String url, String charset) {
        if (doc == null || domTmpl == null || StringUtils.isEmpty((String)type)) {
            LOG.info((Object)(this.workerName + " there is no right template, url=" + url));
            return false;
        }
        this.url = url;
        this.charset = charset;
        this.setType(type);
        this.doc = doc;
        Node root = this.searchNode((Node)doc, "html@@@0/body@@@0");
        if (root == null) {
            root = this.searchNode((Node)doc, "body@@@0");
        }
        if (root == null) {
            LOG.info((Object)(this.workerName + " there is no root, tmpl=" + ",type=" + type + ",url=" + url));
            root = doc;
        }
        Collections.sort(domTmpl.tmpl);
        for (int i = 0; i < domTmpl.getTemplateCount(); ++i) {
            this.reInit();
            DomCFGTree tmpl = domTmpl.getTemplate(i);
            if (tmpl.getActive() == 1) {
                LOG.info((Object)("templateId:" + tmpl.getId() + " has deactive"));
                continue;
            }
            if (!tmpl.getType().equals(type)) continue;
            HashMap<String, Object> resultMap = new HashMap<String, Object>();
            if (!this.parseNode(root, tmpl, tmpl.getOutputField(), resultMap)) continue;
            this.resultMap = resultMap;
            if (!this.checkResult(tmpl)) continue;
            LOG.info((Object)(this.workerName + " parse success\uff0cusing template id=" + tmpl.getId()));
            if (this.imgtasks.size() > 0) {
                LOG.info((Object)(this.workerName + " got imgtasks=" + this.imgtasks));
                ArrayList<Map<String, String>> list = new ArrayList<Map<String, String>>();
                list.addAll(this.imgtasks);
                this.resultMap.put("imgtasks", list);
            }
            if (this.contentimgs.size() > 0) {
                HashMap<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
                map.putAll(this.contentimgs);
                this.resultMap.put("contentimgs", map);
            }
            this.resultMap.put("tmpl_id", tmpl.getId());
            return true;
        }
        return false;
    }

    private void setType(String type) {
        this.type = type;
        if (type.endsWith("info")) {
            this.typeIsInfo = true;
        }
    }

    private boolean checkResult(DomCFGTree tmpl) {
        String[] flds = tmpl.getRequiredField();
        if (flds != null) {
            for (String fld : flds) {
                Object obj = this.resultMap.get(fld);
                if (obj != null && (!(obj instanceof List) || ((List)obj).size() != 0)) continue;
                LOG.info((Object)(this.workerName + " template id is " + tmpl.getId() + " Missing field " + fld));
                return false;
            }
        }
        return true;
    }

    private String extraHandle(String txt, String regEx, String defaultVal) {
        String res = "";
        try {
            Pattern ptrn = Pattern.compile(regEx);
            Matcher mch = ptrn.matcher(txt);
            while (mch.find()) {
                int rn = mch.groupCount();
                for (int i = 1; i <= rn; ++i) {
                    res = res + mch.group(i);
                }
            }
        }
        catch (PatternSyntaxException x) {
            res = defaultVal;
        }
        catch (IllegalStateException x) {
            res = defaultVal;
        }
        catch (Exception x) {
            // empty catch block
        }
        LOG.trace((Object)(this.workerName + " ExtraHandle: from [" + txt + "] to [" + res + "], regEx=[" + regEx + "]"));
        return res;
    }

    public boolean parseNode(Node tNode, DomCFGTree tmpl, Map<String, Boolean> outFields, Map<String, Object> rMap) {
        Node htmlNode = tNode;
        String[] rp = tmpl.getTreePath();
        if (rp != null && rp.length > 0 && (htmlNode = this.searchChildNode(tNode, rp, 0)) == null) {
            if (LOG.isDebugEnabled()) {
                LOG.info((Object)(this.workerName + " curName=" + tNode.getNodeName() + ",cur_id=" + DomSearch.getNodeAttr(tNode, "id", "") + ",cur_class=" + DomSearch.getNodeAttr(tNode, "class", "") + "Failed to locate " + rp[0] + ", H=" + rp.length + ",  node children =>"));
                NodeList childNodes = tNode.getChildNodes();
                for (int i = 0; i < childNodes.getLength(); ++i) {
                    Node item = childNodes.item(i);
                    LOG.info((Object)(this.workerName + " ===> tagName=" + item.getNodeName() + ", class=" + DomSearch.getNodeAttr(item, "class", "") + ", id=" + DomSearch.getNodeAttr(item, "id", "") + ", idx=" + i));
                }
            }
            Node tmpNode = this.getLowestMismatchNode(tNode, rp, 0);
            return false;
        }
        boolean childOK = true;
        List<DomCFGTree> clds = tmpl.getChildren();
        for (int i = 0; i < clds.size(); ++i) {
            childOK &= this.parseNode(htmlNode, clds.get(i), outFields, rMap);
        }
        List<DomCFGField> flds = tmpl.getFields();
        for (int i = 0; i < flds.size(); ++i) {
            if (!outFields.containsKey(flds.get(i).getName())) continue;
            childOK &= this.retrieveField(htmlNode, flds.get(i), rMap);
        }
        List<DomCFGBlock> blocks = tmpl.getBlocks();
        for (DomCFGBlock domcfgBlock : blocks) {
            String name = domcfgBlock.getName();
            String[] mPATH = domcfgBlock.getTreePath();
            ArrayList<Node> nodes = new ArrayList<Node>();
            if (mPATH != null && mPATH.length > 0) {
                this.searchChildList(htmlNode, mPATH, 0, nodes);
                if (nodes.size() < 1) {
                    LOG.info((Object)(this.workerName + " Missing field: " + name));
                    continue;
                }
            }
            if (domcfgBlock.isImglist()) {
                rMap.put("imgs", this.retrieveImageListBlock(domcfgBlock, name, nodes));
                this.hasImgs = true;
                continue;
            }
            ArrayList list = new ArrayList();
            List<DomCFGField> fieldList = domcfgBlock.getFields();
            for (int j = 0; j < nodes.size(); ++j) {
                HashMap<String, Object> fields = new HashMap<String, Object>();
                Template template = null;
                for (int i = 0; i < fieldList.size(); ++i) {
                    template = this.retrieveBlockField(nodes.get(j), fieldList.get(i));
                    if (template == null) continue;
                    Object field = this.convert2Field(template);
                    fields.put(template.getNodeName(), field);
                }
                List<DomCFGTree> bChilds = domcfgBlock.getChildren();
                HashMap<String, Object> nodesMap = new HashMap<String, Object>();
                for (int i = 0; i < bChilds.size(); ++i) {
                    HashMap<String, Object> map = new HashMap<String, Object>();
                    DomCFGTree tree = bChilds.get(i);
                    childOK &= this.parseNode(nodes.get(j), tree, outFields, map);
                    if (map.size() == 0) continue;
                    nodesMap.putAll(map);
                }
                if (nodesMap.size() > 0) {
                    list.add(nodesMap);
                }
                List<DomCFGBlock> tBlocks = domcfgBlock.getBlocks();
                for (int i = 0; i < tBlocks.size(); ++i) {
                    ArrayList<Object> blockList = new ArrayList<Object>();
                    DomCFGBlock block = tBlocks.get(i);
                    childOK &= this.parseBlock(nodes.get(j), block, outFields, blockList);
                    fields.put(block.getName(), blockList);
                }
                if (fields.size() > 1) {
                    list.add(fields);
                    continue;
                }
                if (fields.size() != 1 || template == null) continue;
                list.add(fields.get(template.getNodeName()));
            }
            rMap.put(name, list);
        }
        return true;
    }

    public boolean parseBlock(Node tNode, DomCFGBlock tmpl, Map<String, Boolean> outFields, List<Object> blockList) {
        ArrayList<Node> nodeList = new ArrayList<Node>();
        String[] rp = tmpl.getTreePath();
        if (rp != null && rp.length > 0) {
            this.searchChildList(tNode, rp, 0, nodeList);
            if (nodeList.size() == 0) {
                if (LOG.isDebugEnabled()) {
                    LOG.info((Object)(this.workerName + " Failed to locate " + rp[0] + ", H=" + rp.length));
                }
                Node tmpNode = this.getLowestMismatchNode(tNode, rp, 0);
                return false;
            }
        }
        for (Node node : nodeList) {
            boolean childOK = true;
            List<DomCFGTree> clds = tmpl.getChildren();
            HashMap<String, Object> rMap = new HashMap<String, Object>();
            for (int i = 0; i < clds.size(); ++i) {
                childOK &= this.parseNode(node, clds.get(i), outFields, rMap);
            }
            blockList.add(rMap);
        }
        return true;
    }

    private List<Object> retrieveImageListBlock(DomCFGBlock domcfgBlock, String name, ArrayList<Node> nodes) {
        ArrayList<Object> valueList = new ArrayList<Object>();
        List<DomCFGField> fieldList = domcfgBlock.getFields();
        for (int j = 0; j < nodes.size(); ++j) {
            HashMap<String, String> map = new HashMap<String, String>();
            for (int i = 0; i < fieldList.size(); ++i) {
                DomCFGField field = fieldList.get(i);
                Node node = nodes.get(j);
                String[] rp_ = field.getFieldPath();
                if (rp_ != null && rp_.length > 0 && (node = this.searchChildNode(nodes.get(j), rp_, 0)) == null) {
                    LOG.info((Object)(this.workerName + " RetrieveImageListBlock Missing field: " + name));
                    continue;
                }
                String value = "";
                String key = "";
                if ("text".equalsIgnoreCase(field.getName())) {
                    key = "imgtag";
                    value = this.getNodeTextValue(node);
                } else {
                    key = field.getName();
                    value = this.getNodeData(node, "", "", field.isImg());
                }
                map.put(key, value);
            }
            valueList.add(map);
        }
        return valueList;
    }

    private String getNodeTextValue(Node node) {
        String value = null;
        value = DomSearch.getNodeAttr(node, "title", "").trim();
        if (StringUtils.isEmpty((String)value)) {
            value = DomSearch.getNodeAttr(node, "alt", "").trim();
        }
        if (StringUtils.isEmpty((String)value)) {
            value = this.getNodeData(node, "", "").trim();
        }
        return value;
    }

    private Object convert2Field(Template tfld) {
        String value = tfld.getText();
        if (StringUtils.isNotEmpty((String)tfld.getLink())) {
            HashMap<String, String> ol = new HashMap<String, String>();
            ol.put("link", tfld.getLink());
            ol.put("type", tfld.getLinkType());
            ol.put("rawlink", tfld.getRawlink());
            if ("nextpage".equalsIgnoreCase(tfld.getNodeName())) {
                ol.put("text", tfld.getText());
            }
            return ol;
        }
        return value;
    }

    private Template retrieveBlockField(Node tNode, DomCFGField fld) {
        Template template = new Template();
        String name = fld.getName();
        Node node = tNode;
        String[] rp = fld.getFieldPath();
        if (rp != null && rp.length > 0 && (node = this.searchChildNode(tNode, rp, 0)) == null) {
            LOG.info((Object)(this.workerName + " Missing field: " + name));
            return null;
        }
        String val = null;
        if (fld.isHtml()) {
            try {
                val = this.getZipAnEncodedRawContent(node);
                if (StringUtils.isEmpty((String)val)) {
                    return null;
                }
            }
            catch (Exception e) {
                LOG.warn((Object)(this.workerName + " zip and encode rawContent error, url=" + this.url));
            }
        } else {
            val = this.getNodeData(node, "", "", fld.isImg(), this.typeIsInfo && fld.collectImg(), fld.attName());
        }
        String rawLink = null;
        String link = null;
        String linkType = null;
        if (fld.isLink()) {
            linkType = fld.getLinkType();
            link = rawLink = this.matchUrl(node, linkType, fld.getLinkAtt());
        }
        template.setNodeName(name);
        template.setText(val);
        template.setLink(link);
        template.setRawlink(rawLink);
        template.setLinkType(linkType);
        StringBuilder sb = new StringBuilder().append(this.workerName).append(" Got filed: ").append(name).append("=");
        if (fld.isLink()) {
            sb.append(", link=").append(link).append(", rawlink=").append(rawLink);
        } else {
            sb.append(val);
        }
        LOG.info((Object)sb.toString());
        return template;
    }

    private boolean retrieveField(Node tNode, DomCFGField fld, Map<String, Object> rMap) {
        String name = fld.getName();
        Node node = tNode;
        String[] rp = fld.getFieldPath();
        if (rp != null && rp.length > 0 && (node = this.searchChildNode(tNode, rp, 0)) == null) {
            LOG.info((Object)(this.workerName + " Missing field: " + name));
            return false;
        }
        String val = null;
        if (fld.isHtml()) {
            try {
                val = this.getZipAnEncodedRawContent(node);
                if (StringUtils.isEmpty((String)val)) {
                    return false;
                }
            }
            catch (Exception e) {
                LOG.warn((Object)(this.workerName + " zip and encode rawContent error, url=" + this.url));
            }
        } else {
            val = this.getNodeData(node, "", "", fld.isImg(), this.typeIsInfo && fld.collectImg(), fld.attName());
        }
        String rawLink = null;
        String link = null;
        String linkType = null;
        if (fld.isLink()) {
            linkType = fld.getLinkType();
            link = rawLink = this.matchUrl(node, linkType, fld.getLinkAtt());
        }
        if (fld.isMultiField()) {
            ArrayList<Object> list = new ArrayList<Object>();
            if (fld.isLink()) {
                HashMap<String, String> ov = new HashMap<String, String>();
                ov.put("text", val);
                ov.put("link", link);
                ov.put("rawlink", rawLink);
                ov.put("type", linkType);
                list.add(ov);
            } else {
                list.add(val);
            }
            while ((node = node.getNextSibling()) != null) {
                this.postFun(list, node, fld);
            }
            rMap.put(name, list);
        } else if (fld.isLink()) {
            HashMap<String, String> ov = new HashMap<String, String>();
            ov.put("rawlink", rawLink);
            ov.put("link", link);
            ov.put("type", linkType);
            rMap.put(name, ov);
        } else if (fld.needSegm()) {
            ArrayList<String> segs = new ArrayList<String>();
            segs.addAll(Arrays.asList(val.split(fld.getSegmflag())));
            rMap.put(name, segs);
        } else {
            rMap.put(name, val);
        }
        return true;
    }

    public String matchUrl(Node node, String linkType, String linkatt) {
        String result = null;
        String regex = "^\\s*(?!(#|javascript))[^\\s]+\\s*/?$";
        Node urlNode = null;
        if (node.hasChildNodes() && node.getFirstChild().getNodeName().equalsIgnoreCase("a")) {
            urlNode = node.getFirstChild().getAttributes().getNamedItem(linkatt);
        } else if (node.getNodeName().equalsIgnoreCase("a")) {
            urlNode = node.getAttributes().getNamedItem(linkatt);
        }
        if (urlNode != null) {
            String url = urlNode.getNodeValue();
            Pattern pa = Pattern.compile(regex, 32);
            Matcher ma = pa.matcher(url);
            if (ma.find()) {
                result = ma.group();
                if (result.contains(" ")) {
                    return result.replace(" ", "%20");
                }
                return result;
            }
        }
        return null;
    }

    private List<Object> postFun(List<Object> list, Node node, DomCFGField fld) {
        String[] str;
        if (node == null) {
            return list;
        }
        if (node.getFirstChild() != null && fld.getFieldPath().length > 0 && this.matchName(node, (str = fld.getFieldPath()[fld.getFieldPath().length - 1].split("@"))[0], null, str[2])) {
            String val = this.getNodeData(node, "", "");
            if (fld.isLink()) {
                String rawLink;
                String linkType = fld.getLinkType();
                String link = rawLink = this.matchUrl(node, linkType, fld.getLinkAtt());
                HashMap<String, String> ov = new HashMap<String, String>();
                ov.put("text", val);
                ov.put("rawlink", rawLink);
                ov.put("link", link);
                ov.put("type", linkType);
                list.add(ov);
            } else {
                list.add(val);
            }
        }
        return list;
    }

    public static String getNodeAttr(Node node, String attname, String strDefault) {
        Node att;
        if (node == null) {
            return strDefault;
        }
        NamedNodeMap attrs = node.getAttributes();
        if (attrs != null && (att = attrs.getNamedItem(attname)) != null) {
            return att.getNodeValue();
        }
        return strDefault;
    }

    private boolean matchName(Node node, String name) {
        return name != null && name.equalsIgnoreCase(node.getNodeName().trim());
    }

    private boolean matchName(Node node, String name, String id, String nodeClass) {
        LOG.info((Object)("id:" + DomSearch.getNodeAttr(node, "id", "").trim() + ".class:" + DomSearch.getNodeAttr(node, "class", "").trim()));
        if (!this.matchName(node, name)) {
            return false;
        }
        if (id != null && id.length() > 0) {
            return id.trim().equalsIgnoreCase(DomSearch.getNodeAttr(node, "id", "").trim());
        }
        if (nodeClass != null && nodeClass.length() > 0) {
            return this.isClassMatch(nodeClass.trim(), DomSearch.getNodeAttr(node, "class", "").trim());
        }
        return true;
    }

    private boolean isClassMatch(String tmplClass, String nodeClass) {
        int i = 0;
        int j = 0;
        int k = 0;
        try {
            String[] acls = tmplClass.split("\\s+");
            String[] ncls = nodeClass.split("\\s+");
            if (nodeClass.trim().length() < 1) {
                for (i = 0; i < acls.length; ++i) {
                    if (acls[i].startsWith("!")) continue;
                    return false;
                }
                return true;
            }
            block3: for (i = 0; i < acls.length; ++i) {
                boolean bNeg = false;
                if (acls[i].startsWith("!")) {
                    acls[i] = acls[i].substring(1);
                    bNeg = true;
                }
                for (j = 0; j < ncls.length; ++j) {
                    if (!acls[i].equalsIgnoreCase(ncls[j])) continue;
                    if (bNeg) {
                        if (tmplClass.startsWith("!")) {
                            LOG.info((Object)"match failed");
                        }
                        return false;
                    }
                    ncls[j] = "";
                    ++k;
                    continue block3;
                }
            }
            if (k >= acls.length) {
                if (tmplClass.startsWith("!") && LOG.isDebugEnabled()) {
                    LOG.info((Object)"match OK");
                }
                return true;
            }
        }
        catch (PatternSyntaxException x) {
            LOG.info((Object)this.workerName, (Throwable)x);
        }
        if (tmplClass.startsWith("!")) {
            LOG.info((Object)(this.workerName + " match failed: " + i + ", " + j + "," + k));
        }
        return false;
    }

    public Node searchNode(Node node, String path) {
        if (path == null || path.length() < 1) {
            return node;
        }
        String[] names = path.split("/");
        LOG.trace((Object)("Search Path=" + path));
        return this.searchChildNode(node, names, 0);
    }

    private Node searchChildNode(Node node, String[] paths, int level) {
        if (level > paths.length) {
            return null;
        }
        String[] names = paths[level].split("@");
        if (names.length < 4) {
            LOG.info((Object)("bad path: " + paths[level] + ", len=" + names.length));
            return null;
        }
        int nodeIDX = 0;
        try {
            nodeIDX = Integer.parseInt(names[3]);
        }
        catch (NumberFormatException x) {
            // empty catch block
        }
        Node[] childs = new Node[200];
        int childNum = !(names[1] != null && names[1].trim().length() != 0 || names[2] != null && names[2].trim().length() != 0) ? this.getChildByIndex(node, names[0], childs, nodeIDX) : this.getStrictChild(node, names[0], names[1], names[2], childs);
        if (childNum < 1) {
            LOG.trace((Object)(this.workerName + " Miss Match - 00 at level " + level + ", nodePattern:[" + paths[level] + "]"));
            if (level == 0) {
                this.printErrorMessage(node, paths, level);
            }
            if (this.maxPrintLevel == 99) {
                this.printNode(node);
            }
            return null;
        }
        int imatchIDX = 0;
        Node[] nodeMatch = new Node[200];
        for (int i = 0; i < childNum; ++i) {
            nodeMatch[imatchIDX] = level < paths.length - 1 ? this.searchChildNode(childs[i], paths, level + 1) : childs[i];
            if (nodeMatch[imatchIDX] == null) continue;
            ++imatchIDX;
        }
        if (imatchIDX == 0) {
            this.printNode(node);
            LOG.info((Object)(this.workerName + " Miss Match - 11 at level " + level + ", num=" + childNum + ", nodePattern:[" + paths[level] + "]"));
            if (level == 0) {
                this.printErrorMessage(node, paths, level);
            }
            return null;
        }
        if (imatchIDX == 1) {
            return nodeMatch[0];
        }
        childNum = this.getChildByIndex(node, names[0], childs, nodeIDX);
        if (childNum < 1) {
            return null;
        }
        if (level >= paths.length - 1) {
            return childs[0];
        }
        childs[1] = this.searchChildNode(childs[0], paths, level + 1);
        if (childs[1] == null && level == 0) {
            LOG.trace((Object)("Miss Match - 22 at level " + level + ", num=" + childNum + ", nodePattern:[" + paths[level] + "]"));
            this.printErrorMessage(node, paths, level);
        }
        return childs[1];
    }

    private void searchChildList(Node node, String[] paths, int level, List<Node> rlist) {
        if (level > paths.length) {
            return;
        }
        String[] names = paths[level].split("@");
        if (names.length < 4) {
            LOG.info((Object)(this.workerName + " bad path: " + paths[level] + ", len=" + names.length));
            return;
        }
        boolean bMulti = false;
        if (paths[level].endsWith("*")) {
            bMulti = true;
        }
        int nodeIDX = 0;
        if (!bMulti) {
            try {
                nodeIDX = Integer.parseInt(names[3]);
            }
            catch (NumberFormatException x) {
                // empty catch block
            }
        }
        Node[] childs = new Node[200];
        int childNum = !(bMulti || names[1] != null && names[1].trim().length() != 0 || names[2] != null && names[2].trim().length() != 0) ? this.getChildByIndex(node, names[0], childs, nodeIDX) : this.getStrictChild(node, names[0], names[1], names[2], childs);
        if (childNum < 1) {
            LOG.trace((Object)(this.workerName + " Miss Match - 00 at level " + level + ", nodePattern:[" + paths[level] + "]"));
            if (level == 0) {
                this.printErrorMessage(node, paths, level);
            }
            if (this.maxPrintLevel == 99) {
                this.printNode(node);
            }
            return;
        }
        int imatchIDX = 0;
        Node[] nodeMatch = new Node[200];
        for (int i = 0; i < childNum; ++i) {
            if (level < paths.length - 1) {
                int ix = rlist.size();
                this.searchChildList(childs[i], paths, level + 1, rlist);
                nodeMatch[imatchIDX] = rlist.size() <= ix ? null : childs[i];
            } else {
                nodeMatch[imatchIDX] = childs[i];
                rlist.add(childs[i]);
            }
            if (nodeMatch[imatchIDX] == null) continue;
            ++imatchIDX;
        }
        if (imatchIDX == 0) {
            this.printNode(node);
            LOG.trace((Object)(this.workerName + " Miss Match - 11 at level " + level + ", num=" + childNum + ", nodePattern:[" + paths[level] + "]"));
            if (level == 0) {
                this.printErrorMessage(node, paths, level);
            }
            return;
        }
        if (bMulti || imatchIDX == 1) {
            return;
        }
        childNum = this.getChildByIndex(node, names[0], childs, nodeIDX);
        if (childNum < 1) {
            return;
        }
        if (level >= paths.length - 1) {
            rlist.add(childs[0]);
            return;
        }
        this.searchChildList(childs[0], paths, level + 1, rlist);
        if (rlist.size() < 1 && level == 0) {
            LOG.trace((Object)(this.workerName + " Miss Match - 22 at level " + level + ", num=" + childNum + ", nodePattern:[" + paths[level] + "]"));
            this.printErrorMessage(node, paths, level);
        }
    }

    private void printErrorMessage(Node node, String[] paths, int level) {
        Node tmpNode = this.getLowestMismatchNode(node, paths, level);
        this.printNode(tmpNode);
    }

    private Node getLowestMismatchNode(Node node, String[] paths, int level) {
        if (level >= paths.length) {
            return node;
        }
        String[] names = paths[level].split("@");
        int nodeIDX = 0;
        if (names.length < 4) {
            LOG.info((Object)(this.workerName + " bad path: " + paths[level] + ", len=" + names.length));
            return node;
        }
        try {
            nodeIDX = Integer.parseInt(names[3]);
        }
        catch (NumberFormatException x) {
            // empty catch block
        }
        Node[] childs = new Node[200];
        int childNum = this.getStrictChild(node, names[0], names[1], names[2], childs);
        if (childNum != 1) {
            childNum = this.getChildByIndex(node, names[0], childs, nodeIDX);
        }
        if (childNum == 0) {
            return node;
        }
        return this.getLowestMismatchNode(childs[0], paths, level + 1);
    }

    private int getChildByIndex(Node parent, String name, Node[] childs, int index) {
        int ic = 0;
        NodeList clist = parent.getChildNodes();
        for (int i = 0; i < clist.getLength(); ++i) {
            if (!this.matchName(clist.item(i), name) || ic++ < index) continue;
            childs[0] = clist.item(i);
            return 1;
        }
        return 0;
    }

    private int getStrictChild(Node node, String name, String id, String nodeClass, Node[] childs) {
        int cn = 0;
        NodeList clist = node.getChildNodes();
        LOG.info((Object)("node children size is " + clist.getLength()));
        for (int i = 0; i < clist.getLength(); ++i) {
            if (!this.matchName(clist.item(i), name, id, nodeClass)) continue;
            childs[cn++] = clist.item(i);
            if (cn < 200) continue;
            return 200;
        }
        if (childs[0] == null) {
            // empty if block
        }
        return cn;
    }

    public void printNode(Node node) {
        if (this.treePrintBuffer == null) {
            this.treePrintBuffer = new StringBuffer(32768);
        } else {
            this.treePrintBuffer.delete(0, this.treePrintBuffer.length());
        }
        if (this.printMode == 1) {
            this.print2PlainText(node, 0);
        } else if (this.printMode == 0) {
            this.print2Console(node, 0);
        } else {
            this.print2XmlData(node, 0);
        }
    }

    private String getNodeInfor(Node node) {
        if (node == null) {
            return "";
        }
        StringBuffer as = new StringBuffer(512);
        String name = node.getNodeName();
        NamedNodeMap aMap = node.getAttributes();
        String id = "";
        String aclass = "";
        if (aMap != null && aMap.getLength() > 0) {
            Node ni = aMap.getNamedItem("id");
            if (ni != null) {
                id = ni.getNodeValue();
            }
            if ((ni = aMap.getNamedItem("class")) != null) {
                aclass = ni.getNodeValue();
            }
        }
        int idx = 0;
        for (Node prev = node.getPreviousSibling(); prev != null; prev = prev.getPreviousSibling()) {
            if (!prev.getNodeName().equalsIgnoreCase(name)) continue;
            ++idx;
        }
        as.append("name='" + name + " ' dom='" + name + "@");
        as.append(id);
        as.append("@");
        as.append(aclass);
        as.append("@");
        as.append(idx + " '");
        String ss = this.getNodeData(node, "", "");
        if (ss.length() > 30) {
            ss = ss.substring(0, 30);
        }
        as.append(" value='");
        as.append(ss);
        as.append(" '");
        return as.toString();
    }

    private void print2Console(Node node, int level) {
        if (!this.printTreeWhileFailed || level >= this.maxPrintLevel) {
            return;
        }
        if (node.getNodeName().startsWith("#")) {
            return;
        }
        String as = "";
        for (int i = 0; i < level; ++i) {
            as = as + "  ";
        }
        LOG.info((Object)(as + "l=" + level + ", " + this.getNodeInfor(node)));
        if (level < this.maxPrintLevel && node.hasChildNodes()) {
            NodeList childs = node.getChildNodes();
            for (int i = 0; i < childs.getLength(); ++i) {
                this.print2Console(childs.item(i), level + 1);
            }
        }
        if (level == 0) {
            LOG.trace((Object)"tree output END ------");
        }
    }

    private void print2PlainText(Node node, int level) {
        if (!this.printTreeWhileFailed || level >= this.maxPrintLevel) {
            return;
        }
        if (node.getNodeName().startsWith("#")) {
            return;
        }
        for (int i = 0; i < level; ++i) {
            this.treePrintBuffer.append("  ");
        }
        this.treePrintBuffer.append("l=" + level + ", " + this.getNodeInfor(node) + "\n");
        if (level < this.maxPrintLevel && node.hasChildNodes()) {
            NodeList childs = node.getChildNodes();
            for (int i = 0; i < childs.getLength(); ++i) {
                this.print2PlainText(childs.item(i), level + 1);
            }
        }
        if (level == 0) {
            LOG.trace((Object)"tree output END ------");
        }
    }

    private void print2XmlData(Node node, int level) {
        NodeList childs;
        if (!this.printTreeWhileFailed || level > this.maxPrintLevel) {
            return;
        }
        if (node.getNodeName().startsWith("#")) {
            return;
        }
        String xmlNodeName = "level" + (level - 1);
        if (level == 0) {
            this.treePrintBuffer.append("<?xml version='1.0'?>\n");
            xmlNodeName = "Top";
        }
        for (int i = 0; i < level; ++i) {
            this.treePrintBuffer.append(" ");
        }
        this.treePrintBuffer.append("<" + xmlNodeName + " caption='");
        this.treePrintBuffer.append(node.getNodeName() + " ' ");
        this.treePrintBuffer.append(this.getNodeInfor(node));
        this.treePrintBuffer.append(">");
        if (level < this.maxPrintLevel && node.hasChildNodes() && ((childs = node.getChildNodes()).getLength() > 1 || !childs.item(0).getNodeName().startsWith("#"))) {
            int i;
            this.treePrintBuffer.append("\n");
            for (i = 0; i < childs.getLength(); ++i) {
                this.print2XmlData(childs.item(i), level + 1);
            }
            for (i = 0; i < level; ++i) {
                this.treePrintBuffer.append(" ");
            }
        }
        this.treePrintBuffer.append("</" + xmlNodeName + ">\n");
        if (level == 0) {
            LOG.trace((Object)"tree output END ------");
        }
    }

    private boolean isValidContent(String data) {
        String[] keys = new String[]{"\u7684", "\u4e00", "\u662f", "\u4e86", "\u6211", "\u4e0d", "\u4eba", "\u5728", "\u4ed6", " \u6709", "\u8fd9", "\u4e2a", "\u4e0a", "\u4eec", "\u6765", "\u5230", "\u65f6"};
        int ic = 0;
        for (int i = 0; i < keys.length; ++i) {
            if (data.indexOf(keys[i]) < 0 || ++ic < 3) continue;
            return true;
        }
        LOG.info((Object)("code check failed, data: " + data));
        return false;
    }

    public static void main(String[] args) {
        String sss = "\u968f\u673ass\u6389\u843ds\u5f00\u53d1sss\u6d12s\u843ds\u7684\u5206ssssss\u5757";
        sss = sss.replace("s", "\t");
    }
}

