/*
 * Decompiled with CFR 0.152.
 */
package com.bfd.crawler.utils.autoparse;

import com.bfd.crawler.utils.JsonUtils;
import com.bfd.crawler.utils.crawler.httpclient.MyCrawler;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.extractors.ArticleExtractor;
import de.l3s.boilerpipe.extractors.CommonExtractors;
import de.l3s.boilerpipe.sax.BoilerpipeSAXInput;
import java.io.ByteArrayInputStream;
import java.util.HashMap;
import java.util.Map;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class AutoParse {
    public static Map<String, String> getTitleAndContent(String html) {
        InputSource is = new InputSource(new ByteArrayInputStream(html.getBytes()));
        is.setEncoding("utf-8");
        HashMap<String, String> rs = new HashMap<String, String>();
        try {
            TextDocument doc = new BoilerpipeSAXInput(is).getTextDocument();
            ArticleExtractor extractor = CommonExtractors.ARTICLE_EXTRACTOR;
            extractor.process(doc);
            rs.put("title", doc.getTitle());
            rs.put("content", doc.getContent());
        }
        catch (BoilerpipeProcessingException e) {
            e.printStackTrace();
        }
        catch (SAXException e) {
            e.printStackTrace();
        }
        return rs;
    }

    public static void main(String[] args) {
        String url = "http://www.theregister.co.uk/2012/09/18/cisco_ace_no_more/";
        String html = new MyCrawler().get(url)[1];
        System.out.println(JsonUtils.toJSONString(AutoParse.getTitleAndContent(html)));
    }
}

