package com.evernote.enml.converter;

import com.baidu.android.pushservice.PushConstants;
import com.baidu.frontia.api.FrontiaPersonalStorage;
import com.evernote.edam.type.Resource;
import com.evernote.enml.ENMLConstants;
import com.evernote.enml.ENMLUtil;
import com.evernote.enml.ResourceData;
import com.evernote.enml.ResourceFetcher;
import com.evernote.enml.dtd.DTDAttribute;
import com.evernote.enml.dtd.SimpleENMLDTD;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

/* loaded from: classes.dex */
public class HTMLToENML {
    private String content;
    private HTMLNodeHandler customizedHandler;
    private ResourceFetcher fetcher;
    private String keywords;
    private String title;
    private static final Logger logger = Logger.getLogger(HTMLToENML.class.getName());
    private static final Pattern PATTERN_DISPLAY_NONE = Pattern.compile("display\\s*:\\s*none");
    private static final Pattern INVALID_CONTENT_TEXT_PATTERN = Pattern.compile("[^\\x09\\x0A\\x0D\\u0020-\\uD7FF\\uE000-\\uFFFD\\x{10000}-\\x{10FFFF}]");
    private static final Pattern INVALID_TITLE_TEXT_PATTERN = Pattern.compile("[\\p{Cc}\\p{Zl}\\p{Zp}]");
    protected static final Map<String, String> TAG_TRANSFORM_MAP = new HashMap();
    protected static final Set<String> TAG_TO_REMOVE_SET = new HashSet();
    private List<Resource> resourceList = null;
    public long replaceTime = 0;

    static {
        TAG_TRANSFORM_MAP.put("html", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put(ENMLConstants.HTML_BODY, ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("form", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("main", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("fieldset", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("iframe", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("embed", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("article", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("aside", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("detailes", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("footer", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("header", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("figure", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("figcaption", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("hgroup", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("nav", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("section", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("summary", ENMLConstants.HTML_DIV_TAG);
        TAG_TRANSFORM_MAP.put("legend", ENMLConstants.HTML_SPAN_TAG);
        TAG_TRANSFORM_MAP.put("label", ENMLConstants.HTML_SPAN_TAG);
        TAG_TRANSFORM_MAP.put("highlight", ENMLConstants.HTML_SPAN_TAG);
        TAG_TRANSFORM_MAP.put("mark", ENMLConstants.HTML_SPAN_TAG);
        TAG_TRANSFORM_MAP.put("canvas", "img");
        TAG_TRANSFORM_MAP.put(FrontiaPersonalStorage.TYPE_STREAM_VIDEO, "img");
        TAG_TO_REMOVE_SET.add("script");
        TAG_TO_REMOVE_SET.add("noscript");
        TAG_TO_REMOVE_SET.add("ruby");
        TAG_TO_REMOVE_SET.add(ENMLConstants.HTML_LINK_TAG);
        TAG_TO_REMOVE_SET.add(ENMLConstants.HTML_STYLE_ATTR);
    }

    public HTMLToENML(ResourceFetcher resourceFetcher, HTMLNodeHandler hTMLNodeHandler) {
        this.fetcher = resourceFetcher;
        this.customizedHandler = hTMLNodeHandler;
    }

    private void extractKeywords(Document document) {
        Elements select = document.select("meta[property=keywords]");
        if (select != null) {
            this.keywords = select.attr(PushConstants.EXTRA_CONTENT);
            this.keywords = ENMLUtil.cleanString(this.keywords);
        }
    }

    private String removeInvalidChar(String str, Pattern pattern) {
        Matcher matcher = pattern.matcher(str);
        StringBuffer stringBuffer = new StringBuffer(str.length());
        while (matcher.find()) {
            matcher.appendReplacement(stringBuffer, Matcher.quoteReplacement(""));
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    protected void cleanAttributes(SimpleENMLDTD simpleENMLDTD, Element element) {
        Attributes attributes = element.attributes();
        String tagName = element.tagName();
        HashSet hashSet = new HashSet();
        if (attributes != null) {
            Iterator<Attribute> it = attributes.iterator();
            while (it.hasNext()) {
                Attribute next = it.next();
                String key = next.getKey();
                String value = next.getValue();
                if (simpleENMLDTD.isAttributeAllowed(tagName, key, value)) {
                    DTDAttribute dTDAttriute = simpleENMLDTD.getDTDAttriute(tagName, key);
                    if (dTDAttriute.getType() == DTDAttribute.AttributeType.NMTOKEN) {
                        if (!DTDAttribute.NMTOKEN_PATTERN.matcher(value).matches()) {
                            next.setValue(value.replaceAll("[^\\d\\w\\-\\:]", "_"));
                        } else if (dTDAttriute.getType() == DTDAttribute.AttributeType.NMTOKENS && DTDAttribute.NMTOKENS_PATTERN.matcher(value).matches()) {
                            next.setValue(value.replaceAll("[^\\d\\w\\-\\: ]", "_").trim());
                        }
                    }
                    hashSet.add(key);
                } else {
                    attributes.remove(key);
                }
            }
        }
        Map<String, DTDAttribute> requiredDTDAttriutes = simpleENMLDTD.getRequiredDTDAttriutes(tagName);
        if (requiredDTDAttriutes == null || requiredDTDAttriutes.size() <= 0) {
            return;
        }
        for (DTDAttribute dTDAttribute : requiredDTDAttriutes.values()) {
            if (!hashSet.contains(dTDAttribute.getName())) {
                String value2 = dTDAttribute.getValue();
                if (value2 == null && dTDAttribute.getDefaultValueModel() == DTDAttribute.DefaultValueModel.REQUIRED) {
                    value2 = dTDAttribute.getType() == DTDAttribute.AttributeType.SET ? (String) dTDAttribute.getEnumeratedValues().toArray()[0] : "unknown";
                }
                element.attr(dTDAttribute.getName(), value2);
            }
        }
    }

    public boolean convert(String str, String str2, String str3) {
        if (str == null) {
            return false;
        }
        this.title = null;
        this.content = null;
        this.resourceList = new ArrayList();
        Document parse = Jsoup.parse(str, str3);
        parse.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
        parse.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
        parse.outputSettings().prettyPrint(false);
        this.title = parse.title();
        if (this.title != null) {
            this.title = ENMLUtil.cleanString(this.title);
            if (this.title.isEmpty()) {
                this.title = null;
            } else {
                this.title = removeInvalidChar(this.title, INVALID_TITLE_TEXT_PATTERN);
            }
        }
        if (this.customizedHandler != null) {
            this.customizedHandler.initialize();
            this.keywords = this.customizedHandler.extractKeywords(parse);
        }
        if (this.keywords == null || this.keywords.isEmpty()) {
            extractKeywords(parse);
        }
        if (this.keywords != null) {
            this.keywords = removeInvalidChar(this.keywords, INVALID_TITLE_TEXT_PATTERN);
        }
        if (str2 == null || str2.isEmpty()) {
            Element body = parse.body();
            if (body == null || !traverse(body)) {
                return false;
            }
            this.content = body.toString();
            return true;
        }
        Elements select = parse.select(str2);
        if (select == null || select.size() <= 0) {
            return false;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < select.size(); i++) {
            Element element = select.get(i);
            if (traverse(element)) {
                sb.append(element.toString());
            }
        }
        if (sb.length() <= 0) {
            return false;
        }
        this.content = sb.toString();
        return true;
    }

    public String getContent() {
        return this.content;
    }

    public ResourceFetcher getFetcher() {
        return this.fetcher;
    }

    public String getKeywords() {
        return this.keywords;
    }

    public List<Resource> getResources() {
        return this.resourceList;
    }

    public String getTitle() {
        return this.title;
    }

    protected boolean head(Node node, int i) {
        if (!(node instanceof Element) && !(node instanceof TextNode)) {
            return false;
        }
        if (this.customizedHandler != null && !this.customizedHandler.process(node, this.fetcher)) {
            return false;
        }
        if (node instanceof Element) {
            Element element = (Element) node;
            element.tagName(element.tagName().toLowerCase());
            if (!transformSpecialTags(element)) {
                return false;
            }
            SimpleENMLDTD simpleENMLDTD = SimpleENMLDTD.getInstance();
            if (!simpleENMLDTD.isElementAllowed(element.tagName())) {
                element.tagName(ENMLConstants.HTML_SPAN_TAG);
            }
            cleanAttributes(simpleENMLDTD, element);
        }
        return true;
    }

    public void setFetcher(ResourceFetcher resourceFetcher) {
        this.fetcher = resourceFetcher;
    }

    protected void tail(Node node, int i, boolean z) {
        if (!z) {
            node.remove();
        } else if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            textNode.text(removeInvalidChar(textNode.text(), INVALID_CONTENT_TEXT_PATTERN));
        }
    }

    protected boolean transformSpecialTags(Element element) {
        String str;
        ResourceData resourceData;
        String attr = element.attr(ENMLConstants.HTML_STYLE_ATTR);
        if (attr != null) {
            if (PATTERN_DISPLAY_NONE.matcher(attr.toLowerCase()).find()) {
                return false;
            }
        }
        String tagName = element.tagName();
        if (TAG_TO_REMOVE_SET.contains(tagName)) {
            return false;
        }
        if (tagName.equals(ENMLConstants.HTML_INPUT_TAG)) {
            String attr2 = element.attr("type");
            str = "checkbox".equalsIgnoreCase(attr2) ? ENMLConstants.EN_TODO_TAG : FrontiaPersonalStorage.TYPE_STREAM_IMAGE.equalsIgnoreCase(attr2) ? "img" : ENMLConstants.HTML_SPAN_TAG;
        } else {
            str = TAG_TRANSFORM_MAP.get(tagName);
        }
        if (str != null) {
            tagName = str;
            element.tagName(tagName);
        }
        if (tagName.equals("img")) {
            String absUrl = element.absUrl(ENMLConstants.HTML_IMG_SRC_ATTR);
            if (!ENMLUtil.isAcceptableURL(absUrl)) {
                return false;
            }
            if (this.resourceList == null) {
                element.attr(ENMLConstants.HTML_IMG_SRC_ATTR, absUrl);
                return true;
            }
            if (!this.fetcher.isAllowedURL(absUrl)) {
                logger.log(Level.INFO, "Prohibited image url " + absUrl);
                return false;
            }
            try {
                resourceData = this.fetcher.fetchResource(absUrl, null);
            } catch (Exception e) {
                resourceData = null;
                logger.log(Level.WARNING, "Failed to get resource " + absUrl + " for reason: ", (Throwable) e);
            }
            if (resourceData == null || resourceData.getBytes() == null || resourceData.getMime() == null) {
                return false;
            }
            Resource buildResource = ENMLUtil.buildResource(resourceData.getBytes(), resourceData.getMime(), resourceData.getFilename());
            element.tagName(ENMLConstants.EN_MEDIA_TAG);
            element.attr("type", buildResource.getMime());
            element.attr(ENMLConstants.EN_MEDIA_ATTR_HASH, ENMLUtil.bytesToHex(buildResource.getData().getBodyHash()));
            element.removeAttr(ENMLConstants.HTML_IMG_SRC_ATTR);
            this.resourceList.add(buildResource);
        } else if (tagName.equals(ENMLConstants.HTML_ANCHOR_TAG)) {
            String absUrl2 = element.absUrl(ENMLConstants.HTML_ANCHOR_HREF_ATTR);
            if (!ENMLUtil.isAcceptableURL(absUrl2)) {
                return false;
            }
            element.attr(ENMLConstants.HTML_ANCHOR_HREF_ATTR, absUrl2);
        }
        return true;
    }

    protected boolean traverse(Node node) {
        Node node2 = node;
        int i = 0;
        boolean z = false;
        Stack stack = new Stack();
        while (node2 != null) {
            z = head(node2, i);
            stack.push(Boolean.valueOf(z));
            if (node2.childNodeSize() > 0) {
                node2 = node2.childNode(0);
                i++;
            } else {
                Node nextSibling = node2.nextSibling();
                while (nextSibling == null && i > 0) {
                    Node parentNode = node2.parentNode();
                    tail(node2, i, ((Boolean) stack.pop()).booleanValue());
                    node2 = parentNode;
                    nextSibling = node2.nextSibling();
                    i--;
                }
                z = ((Boolean) stack.pop()).booleanValue();
                tail(node2, i, z);
                if (node2 == node) {
                    break;
                }
                node2 = nextSibling;
            }
        }
        return z;
    }
}
