/*
 * Decompiled with CFR 0.152.
 */
package com.fuwenchao.tmptask.k17.parser;

import com.fuwenchao.tmptask.Constants;
import com.fuwenchao.tmptask.Iparser;
import com.fuwenchao.tmptask.Task;
import com.fuwenchao.tmptask.WorkQueue;
import com.fuwenchao.tmptask.k17.entity.Entity;
import com.fuwenchao.tmptask.k17.entity.Reply;
import com.fuwenchao.tmptask.k17.entity.Subject;
import com.fuwenchao.tmptask.k17.jdbc.Dboption;
import com.fuwenchao.utils.RegexPatternUtils;
import com.fuwenchao.utils.TextUtil;
import com.fuwenchao.utils.htmlcleaner.HtmlCleanerUtil;
import com.fuwenchao.utils.httpclient.MyCrawler;
import java.util.Map;
import org.apache.log4j.Logger;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;

public class K17ItemParser
implements Iparser {
    private static Logger log = Logger.getLogger(K17ItemParser.class);
    private static Dboption dbUtil = new Dboption();

    @Override
    public Map<String, Object> parseHtml(String html, Task task) {
        HtmlCleaner cleaner = new HtmlCleaner();
        TagNode root = cleaner.clean(html);
        HtmlCleanerUtil.removeByTag(root, "//script");
        HtmlCleanerUtil.removeByTag(root, "//style");
        try {
            Object[] objs = root.evaluateXPath("//div[@id='postlist']/div");
            if (objs.length == 0) {
                return null;
            }
            for (Object obj : objs) {
                TagNode div = (TagNode)obj;
                if (!HtmlCleanerUtil.getValueByTagnode(div, "id").startsWith("post_")) continue;
                int floor = -1;
                try {
                    floor = Integer.parseInt(HtmlCleanerUtil.getValueByXpath(div, "//div[@class='pi']/strong[1]/a[1]/em[1]", "", "text"));
                }
                catch (NumberFormatException e) {
                    e.printStackTrace();
                    log.error((Object)("url:" + task.getUrl() + ".err floor:" + HtmlCleanerUtil.getValueByXpath(div, "//div[@class='pi']/strong[1]/a[1]/em[1]", "", "text")));
                }
                log.info((Object)("floor :" + floor));
                if (floor == 1) {
                    this.getSubject(root, div, task);
                } else {
                    this.getReply(div, floor, task);
                }
                System.out.println("-----------------------------------------------------------");
            }
        }
        catch (XPatherException e) {
            e.printStackTrace();
        }
        return null;
    }

    private void addReplyPageTask(TagNode root, String url) {
        String numStr = HtmlCleanerUtil.getValueByXpath(root, "//div[@class='pg']/label[1]/span[1]", "0", "text");
        log.info((Object)("url:" + url + ".get reply task num is :" + numStr));
        int num = Integer.parseInt(RegexPatternUtils.getSingleIdFromStr(numStr));
        log.info((Object)("num:" + num));
        for (int i = 2; i <= num; ++i) {
            Task pageTask = new Task();
            pageTask.setUrl(url.replace("-1-", "-" + i + "-"));
            pageTask.setType(Constants.TASK_TYPE_REPLY_17k);
            WorkQueue.putTask(pageTask);
        }
    }

    private void getSubject(TagNode root, TagNode div, Task task) {
        Subject subject = new Subject();
        String title = HtmlCleanerUtil.getValueByXpath(root, "//div[@id='postlist']/table[1]/tbody[1]/tr[1]/td[2]/h1[1]", "", "text");
        log.info((Object)("title:" + title));
        subject.setTitle(title);
        this.getOtherInfo(div, subject, task);
        dbUtil.insert17kSubject(subject);
        this.addReplyPageTask(root, task.getUrl());
    }

    private void getOtherInfo(TagNode div, Entity entity, Task task) {
        String content = HtmlCleanerUtil.getValueByXpath(div, "//div[@class='t_fsz']/table[1]/tbody[1]/tr[1]/td[1]", "", "text");
        log.info((Object)("content:" + content));
        entity.setContent(content);
        String timeStr = HtmlCleanerUtil.getValueByXpath(div, "//div[@class='authi']/em[1]", "", "text");
        timeStr = timeStr.replace("\u53d1\u8868\u4e8e", "").trim();
        log.info((Object)("time:" + timeStr));
        entity.setCreateTime(timeStr);
        String name = HtmlCleanerUtil.getValueByXpath(div, "//div[@class='authi']/a", "", "text");
        entity.setAuthorName(name);
        String userId = HtmlCleanerUtil.getValueByXpath(div, "//div[@class='authi']/a", "", "href");
        log.info((Object)("name:" + name));
        userId = RegexPatternUtils.getSingleIdFromStr(userId);
        entity.setAuthorId(userId);
        log.info((Object)("userId:" + userId));
        String contentId = HtmlCleanerUtil.getValueByXpath(div, "//div[@class='t_fsz']/table[1]/tbody[1]/tr[1]/td[1]", "", "id");
        contentId = RegexPatternUtils.getSingleIdFromStr(contentId);
        log.info((Object)("contentId:" + contentId));
        entity.setContentId(contentId);
        String subjectIdstr = TextUtil.getRegexGroup(RegexPatternUtils.getSubjectId, task.getUrl(), 1);
        int subjectId = Integer.parseInt(subjectIdstr);
        entity.setSubjectId(subjectId);
    }

    private void getReply(TagNode div, int floornum, Task task) {
        Reply reply = new Reply();
        reply.setFloornum(floornum);
        this.getOtherInfo(div, reply, task);
        dbUtil.insert17kReply(reply);
    }

    public static void main(String[] args) {
        String url = "http://bbs.17k.com/thread-2736945-1-1.html";
        String html = new MyCrawler().get(url)[1];
        Task task = new Task();
        task.setUrl(url);
        new K17ItemParser().parseHtml(html, task);
    }
}

