From ff3bedc37eaac85276f4899c366cbe3710318a67 Mon Sep 17 00:00:00 2001
From: Dimitry Ivanov <mail@dimitryivanov.ru>
Date: Fri, 17 Aug 2018 12:53:36 +0300
Subject: [PATCH] Added 2 modules: html-parser-api and html-parser-impl

---
 html-parser-api/build.gradle                  |   31 +
 html-parser-api/src/main/AndroidManifest.xml  |    1 +
 .../java/ru/noties/markwon/html/HtmlTag.java  |   54 +
 .../markwon/html/MarkwonHtmlParser.java       |   36 +
 .../markwon/html/MarkwonHtmlParserNoOp.java   |   26 +
 html-parser-impl/build.gradle                 |   32 +
 html-parser-impl/src/main/AndroidManifest.xml |    1 +
 .../ru/noties/markwon/html/HtmlTagImpl.java   |  117 ++
 .../markwon/html/MarkwonHtmlParserImpl.java   |  396 ++++
 .../html/jsoup/UncheckedIOException.java      |   13 +
 .../markwon/html/jsoup/helper/Normalizer.java |   18 +
 .../markwon/html/jsoup/helper/Validate.java   |  112 ++
 .../markwon/html/jsoup/nodes/Attribute.java   |  202 ++
 .../markwon/html/jsoup/nodes/Attributes.java  |  444 +++++
 .../html/jsoup/nodes/DocumentType.java        |  104 +
 .../markwon/html/jsoup/nodes/Entities.java    |  351 ++++
 .../html/jsoup/nodes/EntitiesData.java        |   11 +
 .../html/jsoup/parser/CharacterReader.java    |  483 +++++
 .../markwon/html/jsoup/parser/ParseError.java |   41 +
 .../html/jsoup/parser/ParseErrorList.java     |   34 +
 .../markwon/html/jsoup/parser/Token.java      |  398 ++++
 .../markwon/html/jsoup/parser/Tokeniser.java  |  295 +++
 .../html/jsoup/parser/TokeniserState.java     | 1737 +++++++++++++++++
 settings.gradle                               |    3 +-
 24 files changed, 4939 insertions(+), 1 deletion(-)
 create mode 100644 html-parser-api/build.gradle
 create mode 100644 html-parser-api/src/main/AndroidManifest.xml
 create mode 100644 html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
 create mode 100644 html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParser.java
 create mode 100644 html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserNoOp.java
 create mode 100644 html-parser-impl/build.gradle
 create mode 100644 html-parser-impl/src/main/AndroidManifest.xml
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/UncheckedIOException.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Normalizer.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Validate.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attribute.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attributes.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/DocumentType.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Entities.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/EntitiesData.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/CharacterReader.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseError.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseErrorList.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Token.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Tokeniser.java
 create mode 100644 html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/TokeniserState.java

diff --git a/html-parser-api/build.gradle b/html-parser-api/build.gradle
new file mode 100644
index 00000000..53bc9eee
--- /dev/null
+++ b/html-parser-api/build.gradle
@@ -0,0 +1,31 @@
+apply plugin: 'com.android.library'
+
+android {
+
+    compileSdkVersion TARGET_SDK
+    buildToolsVersion BUILD_TOOLS
+
+    defaultConfig {
+        minSdkVersion MIN_SDK
+        targetSdkVersion TARGET_SDK
+        versionCode 1
+        versionName version
+    }
+}
+
+dependencies {
+    api SUPPORT_ANNOTATIONS
+}
+
+afterEvaluate {
+    generateReleaseBuildConfig.enabled = false
+}
+
+// todo: remove `local` check after merge with latest version (1.1.1)
+if (hasProperty('release')) {
+    if (hasProperty('local')) {
+        ext.RELEASE_REPOSITORY_URL = LOCAL_MAVEN_URL
+        ext.SNAPSHOT_REPOSITORY_URL = LOCAL_MAVEN_URL
+    }
+    apply from: 'https://raw.githubusercontent.com/noties/gradle-mvn-push/master/gradle-mvn-push-aar.gradle'
+}
diff --git a/html-parser-api/src/main/AndroidManifest.xml b/html-parser-api/src/main/AndroidManifest.xml
new file mode 100644
index 00000000..6d886e0e
--- /dev/null
+++ b/html-parser-api/src/main/AndroidManifest.xml
@@ -0,0 +1 @@
+<manifest package="ru.noties.markwon.html" />
diff --git a/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java b/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
new file mode 100644
index 00000000..896f0d40
--- /dev/null
+++ b/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
@@ -0,0 +1,54 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+import java.util.List;
+
+/**
+ * @see Inline
+ * @see Block
+ */
+public interface HtmlTag {
+
+    /**
+     * @return normalized tag name (lower-case)
+     */
+    @NonNull
+    String name();
+
+    /**
+     * @return index at which this tag starts
+     */
+    int start();
+
+    /**
+     * @return index at which this tag ends
+     */
+    int end();
+
+    /**
+     * Represents <em>really</em> inline HTML tags (unline commonmark definitions)
+     */
+    interface Inline extends HtmlTag {
+    }
+
+    /**
+     * Represents HTML block tags. Please note that all tags that are not inline should be
+     * considered as block tags
+     */
+    interface Block extends HtmlTag {
+
+        /**
+         * @return parent {@link Block} or null if there is no parent (this block is at root level)
+         */
+        @Nullable
+        Block parent();
+
+        /**
+         * @return list of children
+         */
+        @NonNull
+        List<Block> children();
+    }
+}
diff --git a/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParser.java b/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParser.java
new file mode 100644
index 00000000..cc168b87
--- /dev/null
+++ b/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParser.java
@@ -0,0 +1,36 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+
+import java.util.List;
+
+public abstract class MarkwonHtmlParser {
+
+    @NonNull
+    public static MarkwonHtmlParser noOp() {
+        return new MarkwonHtmlParserNoOp();
+    }
+
+    public interface FlushAction<T> {
+        void apply(@NonNull List<T> tags);
+    }
+
+    public abstract <T extends Appendable & CharSequence> void processFragment(
+            @NonNull T output,
+            @NonNull String htmlFragment);
+
+    // clear all pending tags (if any)
+    // todo: we also can do this: if supplied value is -1 (for example) we ignore tags that are not closed
+    public abstract void flushInlineTags(
+            int documentLength,
+            @NonNull FlushAction<HtmlTag.Inline> action);
+
+    // clear all pending blocks if any
+    // todo: we also can do this: if supplied value is -1 (for example) we ignore tags that are not closed
+    public abstract void flushBlockTags(
+            int documentLength,
+            @NonNull FlushAction<HtmlTag.Block> action);
+
+    public abstract void reset();
+
+}
diff --git a/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserNoOp.java b/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserNoOp.java
new file mode 100644
index 00000000..56926d12
--- /dev/null
+++ b/html-parser-api/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserNoOp.java
@@ -0,0 +1,26 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+
+class MarkwonHtmlParserNoOp extends MarkwonHtmlParser {
+
+    @Override
+    public <T extends Appendable & CharSequence> void processFragment(@NonNull T output, @NonNull String htmlFragment) {
+
+    }
+
+    @Override
+    public void flushInlineTags(int documentLength, @NonNull FlushAction<HtmlTag.Inline> action) {
+
+    }
+
+    @Override
+    public void flushBlockTags(int documentLength, @NonNull FlushAction<HtmlTag.Block> action) {
+
+    }
+
+    @Override
+    public void reset() {
+
+    }
+}
diff --git a/html-parser-impl/build.gradle b/html-parser-impl/build.gradle
new file mode 100644
index 00000000..f1c8860f
--- /dev/null
+++ b/html-parser-impl/build.gradle
@@ -0,0 +1,32 @@
+apply plugin: 'com.android.library'
+
+android {
+
+    compileSdkVersion TARGET_SDK
+    buildToolsVersion BUILD_TOOLS
+
+    defaultConfig {
+        minSdkVersion MIN_SDK
+        targetSdkVersion TARGET_SDK
+        versionCode 1
+        versionName version
+    }
+}
+
+dependencies {
+    api SUPPORT_ANNOTATIONS
+    api project(':html-parser-api')
+}
+
+afterEvaluate {
+    generateReleaseBuildConfig.enabled = false
+}
+
+// todo: remove `local` check after merge with latest version (1.1.1)
+if (hasProperty('release')) {
+    if (hasProperty('local')) {
+        ext.RELEASE_REPOSITORY_URL = LOCAL_MAVEN_URL
+        ext.SNAPSHOT_REPOSITORY_URL = LOCAL_MAVEN_URL
+    }
+    apply from: 'https://raw.githubusercontent.com/noties/gradle-mvn-push/master/gradle-mvn-push-aar.gradle'
+}
diff --git a/html-parser-impl/src/main/AndroidManifest.xml b/html-parser-impl/src/main/AndroidManifest.xml
new file mode 100644
index 00000000..6d886e0e
--- /dev/null
+++ b/html-parser-impl/src/main/AndroidManifest.xml
@@ -0,0 +1 @@
+<manifest package="ru.noties.markwon.html" />
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
new file mode 100644
index 00000000..3f8083dd
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
@@ -0,0 +1,117 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+import java.util.Collections;
+import java.util.List;
+
+abstract class HtmlTagImpl implements HtmlTag {
+
+    static final int NO_VALUE = -1;
+
+    final String name;
+    final int start;
+    int end = NO_VALUE;
+
+    protected HtmlTagImpl(@NonNull String name, int start) {
+        this.name = name;
+        this.start = start;
+    }
+
+    @NonNull
+    @Override
+    public String name() {
+        return name;
+    }
+
+    @Override
+    public int start() {
+        return start;
+    }
+
+    @Override
+    public int end() {
+        return end;
+    }
+
+    boolean isClosed() {
+        return end > NO_VALUE;
+    }
+
+    abstract void closeAt(int end);
+
+
+    static class InlineImpl extends HtmlTagImpl implements Inline {
+
+        InlineImpl(@NonNull String name, int start) {
+            super(name, start);
+        }
+
+        @Override
+        void closeAt(int end) {
+            if (!isClosed()) {
+                super.end = end;
+            }
+        }
+    }
+
+    static class BlockImpl extends HtmlTagImpl implements Block {
+
+        @NonNull
+        static BlockImpl root() {
+            //noinspection ConstantConditions
+            return new BlockImpl("", 0, null);
+        }
+
+        @NonNull
+        static BlockImpl create(@NonNull String name, int start, @NonNull BlockImpl parent) {
+            return new BlockImpl(name, start, parent);
+        }
+
+        final BlockImpl parent;
+        List<BlockImpl> children;
+
+        @SuppressWarnings("NullableProblems")
+        BlockImpl(@NonNull String name, int start, @NonNull BlockImpl parent) {
+            super(name, start);
+            this.parent = parent;
+        }
+
+        @Override
+        void closeAt(int end) {
+            if (!isClosed()) {
+                super.end = end;
+                if (children != null) {
+                    for (BlockImpl child: children) {
+                        child.closeAt(end);
+                    }
+                    children = Collections.unmodifiableList(children);
+                } else {
+                    children = Collections.emptyList();
+                }
+            }
+        }
+
+        boolean isRoot() {
+            return parent == null;
+        }
+
+        @Nullable
+        @Override
+        public Block parent() {
+            if (parent == null) {
+                throw new IllegalStateException("#parent() getter was called on the root node " +
+                        "which should not be exposed outside internal usage");
+            }
+            return parent;
+        }
+
+        @NonNull
+        @Override
+        public List<Block> children() {
+            //noinspection unchecked
+            return (List<Block>) (List<? extends Block>) children;
+        }
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
new file mode 100644
index 00000000..c2813775
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
@@ -0,0 +1,396 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import ru.noties.markwon.html.HtmlTag.Block;
+import ru.noties.markwon.html.HtmlTag.Inline;
+import ru.noties.markwon.html.HtmlTagImpl.BlockImpl;
+import ru.noties.markwon.html.HtmlTagImpl.InlineImpl;
+import ru.noties.markwon.html.jsoup.parser.CharacterReader;
+import ru.noties.markwon.html.jsoup.parser.ParseErrorList;
+import ru.noties.markwon.html.jsoup.parser.Token;
+import ru.noties.markwon.html.jsoup.parser.Tokeniser;
+
+public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
+
+    @NonNull
+    public static MarkwonHtmlParserImpl create() {
+        return new MarkwonHtmlParserImpl();
+    }
+
+    // https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
+    private static final Set<String> INLINE_TAGS;
+
+    private static final Set<String> VOID_TAGS;
+
+    // these are the tags that are considered _block_ ones
+    // this parser will ensure that these blocks are started on a new line
+    // other tags that are NOT inline are considered as block tags, but won't have new line
+    // inserted before them
+    // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
+    private static final Set<String> BLOCK_TAGS;
+
+    private static final String TAG_PARAGRAPH = "p";
+    private static final String TAG_LIST_ITEM = "li";
+
+    // todo: make it configurable
+    private static final String IMG_REPLACEMENT = "\uFFFC";
+
+    static {
+        INLINE_TAGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+                "a", "abbr", "acronym",
+                "b", "bdo", "big", "br", "button",
+                "cite", "code",
+                "dfn",
+                "em",
+                "i", "img", "input",
+                "kbd",
+                "label",
+                "map",
+                "object",
+                "q",
+                "samp", "script", "select", "small", "span", "strong", "sub", "sup",
+                "textarea", "time", "tt",
+                "var"
+        )));
+        VOID_TAGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+                "area",
+                "base", "br",
+                "col",
+                "embed",
+                "hr",
+                "img", "input",
+                "keygen",
+                "link",
+                "meta",
+                "param",
+                "source",
+                "track",
+                "wbr"
+        )));
+        BLOCK_TAGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
+                "address", "article", "aside",
+                "blockquote",
+                "canvas",
+                "dd", "div", "dl", "dt",
+                "fieldset", "figcaption", "figure", "footer", "form",
+                "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr",
+                "li",
+                "main",
+                "nav", "noscript",
+                "ol", "output",
+                "p", "pre",
+                "section",
+                "table", "tfoot",
+                "ul",
+                "video"
+        )));
+    }
+
+    private final List<InlineImpl> inlineTags = new ArrayList<>(0);
+
+    private BlockImpl currentBlock = BlockImpl.root();
+
+
+    @Override
+    public <T extends Appendable & CharSequence> void processFragment(
+            @NonNull T output,
+            @NonNull String htmlFragment) {
+
+        // todo: maybe there is a way to reuse tokeniser...
+        final Tokeniser tokeniser = new Tokeniser(new CharacterReader(htmlFragment), ParseErrorList.noTracking());
+
+        while (true) {
+
+            final Token token = tokeniser.read();
+            final Token.TokenType tokenType = token.type;
+
+            if (Token.TokenType.EOF == tokenType) {
+                break;
+            }
+
+            switch (tokenType) {
+
+                case StartTag: {
+
+                    final Token.StartTag startTag = (Token.StartTag) token;
+
+                    if (isInlineTag(startTag.normalName)) {
+                        processInlineTagStart(output, startTag);
+                    } else {
+                        processBlockTagStart(output, startTag);
+                    }
+                }
+                break;
+
+                case EndTag: {
+
+                    final Token.EndTag endTag = (Token.EndTag) token;
+
+                    if (isInlineTag(endTag.normalName)) {
+                        processInlineTagEnd(output, endTag);
+                    } else {
+                        processBlockTagEnd(output, endTag);
+                    }
+                }
+                break;
+
+                case Character: {
+                    processCharacter(output, ((Token.Character) token));
+                }
+                break;
+            }
+
+            // do not forget to reset processed token (even if it's not processed)
+            token.reset();
+        }
+    }
+
+    @Override
+    public void flushInlineTags(int documentLength, @NonNull FlushAction<Inline> action) {
+        if (inlineTags.size() > 0) {
+            for (InlineImpl inline : inlineTags) {
+                inline.closeAt(documentLength);
+            }
+            //noinspection unchecked
+            action.apply(Collections.unmodifiableList((List<? extends Inline>) inlineTags));
+            inlineTags.clear();
+        }
+    }
+
+    @Override
+    public void flushBlockTags(int documentLength, @NonNull FlushAction<Block> action) {
+
+        BlockImpl block = currentBlock;
+        while (!block.isRoot()) {
+            block = block.parent;
+        }
+
+        block.closeAt(documentLength);
+
+        final List<Block> children = block.children();
+        if (children.size() > 0) {
+            action.apply(children);
+        }
+
+        currentBlock = BlockImpl.root();
+    }
+
+    @Override
+    public void reset() {
+        inlineTags.clear();
+        currentBlock = BlockImpl.root();
+    }
+
+
+    protected <T extends Appendable & CharSequence> void processInlineTagStart(
+            @NonNull T output,
+            @NonNull Token.StartTag startTag) {
+
+        final String name = startTag.normalName;
+
+        final InlineImpl inline = new InlineImpl(name, output.length());
+
+        if (isVoidTag(name)
+                || startTag.selfClosing) {
+
+            // check if we have content to append as we must close this tag here
+            processVoidTag(output, startTag);
+
+            inline.end = output.length();
+        }
+
+        // actually only check if there is content for void/self-closing tags
+        // if none -> ignore it
+        if (inline.start != inline.end) {
+            inlineTags.add(inline);
+        }
+    }
+
+    protected <T extends Appendable & CharSequence> void processInlineTagEnd(
+            @NonNull T output,
+            @NonNull Token.EndTag endTag) {
+
+        // try to find it, if none found -> ignore
+        final InlineImpl openInlineTag = findOpenInlineTag(endTag.normalName);
+        if (openInlineTag != null) {
+            // close open inline tag
+            openInlineTag.end = output.length();
+        }
+    }
+
+
+    protected <T extends Appendable & CharSequence> void processBlockTagStart(
+            @NonNull T output,
+            @NonNull Token.StartTag startTag) {
+
+        final String name = startTag.normalName;
+
+        // block tags (all that are NOT inline -> blocks
+        // I think there is only one strong rule -> paragraph cannot contain anything
+        // except inline tags
+        // also, closing paragraph with non-closed inlines -> doesn't close inlines
+        // they are continued for _afterwards_
+
+        if (TAG_PARAGRAPH.equals(currentBlock.name)) {
+            // it must be closed here not matter what we are as here we _assume_
+            // that it's a block tag
+            append(output, "\n");
+            currentBlock.end = output.length();
+            currentBlock = currentBlock.parent;
+        } else if (TAG_LIST_ITEM.equals(name)
+                && TAG_LIST_ITEM.equals(currentBlock.name)) {
+            // close previous list item if in the same parent
+            currentBlock.end = output.length();
+            currentBlock = currentBlock.parent;
+        }
+
+        if (isBlockTag(name)) {
+            ensureNewLine(output);
+        }
+
+        final int start = output.length();
+
+        final BlockImpl block = BlockImpl.create(name, start, currentBlock);
+
+        //noinspection ConstantConditions
+        appendBlockChild(block.parent, block);
+
+        this.currentBlock = block;
+    }
+
+    protected <T extends Appendable & CharSequence> void processBlockTagEnd(
+            @NonNull T output,
+            @NonNull Token.EndTag endTag) {
+
+        final String name = endTag.normalName;
+
+        final BlockImpl block = findOpenBlockTag(endTag.normalName);
+        if (block != null) {
+
+            if (TAG_PARAGRAPH.equals(name)) {
+                append(output, "\n");
+            }
+
+            block.closeAt(output.length());
+            this.currentBlock = block.parent;
+        }
+    }
+
+    protected <T extends Appendable & CharSequence> void processVoidTag(
+            @NonNull T output,
+            @NonNull Token.StartTag startTag) {
+
+        final String name = startTag.normalName;
+
+        if ("br".equals(name)) {
+            append(output, "\n");
+        } else if ("img".equals(name)) {
+            final String alt = startTag.attributes.getIgnoreCase("alt");
+            if (alt == null
+                    || alt.length() == 0) {
+                // no alt is provided
+                append(output, IMG_REPLACEMENT);
+            } else {
+                append(output, alt);
+            }
+        }
+
+        // other tags are ignored
+    }
+
+    protected <T extends Appendable & CharSequence> void processCharacter(
+            @NonNull T output,
+            @NonNull Token.Character character) {
+
+        // the thing here is: if it's a script tag that we are inside -> we must not treat this
+        // as the text to append... should we even care about this? how many people are
+        // going to include freaking script tags as html inline?
+        //
+        // so tags are: BUTTON, INPUT, SELECT, SCRIPT, TEXTAREA
+        //
+        // actually we must decide it here: should we append freaking characters for these _bad_
+        // tags or not, as later we won't be able to change it and/or allow modification (as
+        // all indexes will be affected with this)
+
+        // for now: ignore the inline context
+        append(output, character.getData());
+    }
+
+    protected void appendBlockChild(@NonNull BlockImpl parent, @NonNull BlockImpl child) {
+        List<BlockImpl> children = parent.children;
+        if (children == null) {
+            children = new ArrayList<>(2);
+            parent.children = children;
+        }
+        children.add(child);
+    }
+
+    @Nullable
+    protected InlineImpl findOpenInlineTag(@NonNull String name) {
+
+        InlineImpl inline;
+
+        for (int i = inlineTags.size() - 1; i > -1; i--) {
+            inline = inlineTags.get(i);
+            if (name.equals(inline.name)
+                    && inline.end < 0) {
+                return inline;
+            }
+        }
+
+        return null;
+    }
+
+    @Nullable
+    protected BlockImpl findOpenBlockTag(@NonNull String name) {
+
+        BlockImpl blockTag = currentBlock;
+
+        while (blockTag != null
+                && !name.equals(blockTag.name)) {
+            blockTag = blockTag.parent;
+        }
+
+        return blockTag;
+    }
+
+    // name here must lower case
+    protected static boolean isInlineTag(@NonNull String name) {
+        return INLINE_TAGS.contains(name);
+    }
+
+    protected static boolean isVoidTag(@NonNull String name) {
+        return VOID_TAGS.contains(name);
+    }
+
+    protected static boolean isBlockTag(@NonNull String name) {
+        return BLOCK_TAGS.contains(name);
+    }
+
+    protected static void append(@NonNull Appendable appendable, @NonNull CharSequence text) {
+        try {
+            appendable.append(text);
+        } catch (IOException e) {
+            // _must_ not happen
+            throw new RuntimeException(e);
+        }
+    }
+
+    protected static <T extends Appendable & CharSequence> void ensureNewLine(@NonNull T output) {
+        final int length = output.length();
+        if (length > 0
+                && '\n' != output.charAt(length - 1)) {
+            append(output, "\n");
+        }
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/UncheckedIOException.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/UncheckedIOException.java
new file mode 100644
index 00000000..9548bdf4
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/UncheckedIOException.java
@@ -0,0 +1,13 @@
+package ru.noties.markwon.html.jsoup;
+
+import java.io.IOException;
+
+public class UncheckedIOException extends RuntimeException {
+    public UncheckedIOException(IOException cause) {
+        super(cause);
+    }
+
+    public IOException ioException() {
+        return (IOException) getCause();
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Normalizer.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Normalizer.java
new file mode 100644
index 00000000..a0df7dd4
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Normalizer.java
@@ -0,0 +1,18 @@
+package ru.noties.markwon.html.jsoup.helper;
+
+import java.util.Locale;
+
+/**
+ * Util methods for normalizing strings. Jsoup internal use only, please don't depend on this API.
+ */
+public final class Normalizer {
+
+    public static String lowerCase(final String input) {
+        return input != null ? input.toLowerCase(Locale.ENGLISH) : "";
+    }
+
+    public static String normalize(final String input) {
+        return lowerCase(input).trim();
+    }
+}
+
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Validate.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Validate.java
new file mode 100644
index 00000000..0d00249b
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/helper/Validate.java
@@ -0,0 +1,112 @@
+package ru.noties.markwon.html.jsoup.helper;
+
+/**
+ * Simple validation methods. Designed for jsoup internal use
+ */
+public final class Validate {
+
+    private Validate() {}
+
+    /**
+     * Validates that the object is not null
+     * @param obj object to test
+     */
+    public static void notNull(Object obj) {
+        if (obj == null)
+            throw new IllegalArgumentException("Object must not be null");
+    }
+
+    /**
+     * Validates that the object is not null
+     * @param obj object to test
+     * @param msg message to output if validation fails
+     */
+    public static void notNull(Object obj, String msg) {
+        if (obj == null)
+            throw new IllegalArgumentException(msg);
+    }
+
+    /**
+     * Validates that the value is true
+     * @param val object to test
+     */
+    public static void isTrue(boolean val) {
+        if (!val)
+            throw new IllegalArgumentException("Must be true");
+    }
+
+    /**
+     * Validates that the value is true
+     * @param val object to test
+     * @param msg message to output if validation fails
+     */
+    public static void isTrue(boolean val, String msg) {
+        if (!val)
+            throw new IllegalArgumentException(msg);
+    }
+
+    /**
+     * Validates that the value is false
+     * @param val object to test
+     */
+    public static void isFalse(boolean val) {
+        if (val)
+            throw new IllegalArgumentException("Must be false");
+    }
+
+    /**
+     * Validates that the value is false
+     * @param val object to test
+     * @param msg message to output if validation fails
+     */
+    public static void isFalse(boolean val, String msg) {
+        if (val)
+            throw new IllegalArgumentException(msg);
+    }
+
+    /**
+     * Validates that the array contains no null elements
+     * @param objects the array to test
+     */
+    public static void noNullElements(Object[] objects) {
+        noNullElements(objects, "Array must not contain any null objects");
+    }
+
+    /**
+     * Validates that the array contains no null elements
+     * @param objects the array to test
+     * @param msg message to output if validation fails
+     */
+    public static void noNullElements(Object[] objects, String msg) {
+        for (Object obj : objects)
+            if (obj == null)
+                throw new IllegalArgumentException(msg);
+    }
+
+    /**
+     * Validates that the string is not empty
+     * @param string the string to test
+     */
+    public static void notEmpty(String string) {
+        if (string == null || string.length() == 0)
+            throw new IllegalArgumentException("String must not be empty");
+    }
+
+    /**
+     * Validates that the string is not empty
+     * @param string the string to test
+     * @param msg message to output if validation fails
+     */
+    public static void notEmpty(String string, String msg) {
+        if (string == null || string.length() == 0)
+            throw new IllegalArgumentException(msg);
+    }
+
+    /**
+     Cause a failure.
+     @param msg message to output.
+     */
+    public static void fail(String msg) {
+        throw new IllegalArgumentException(msg);
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attribute.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attribute.java
new file mode 100644
index 00000000..fea596e2
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attribute.java
@@ -0,0 +1,202 @@
+package ru.noties.markwon.html.jsoup.nodes;
+
+import java.util.Map;
+
+import ru.noties.markwon.html.jsoup.helper.Validate;
+
+/**
+ A single key + value attribute. (Only used for presentation.)
+ */
+public class Attribute implements Map.Entry<String, String>, Cloneable  {
+//    private static final String[] booleanAttributes = {
+//            "allowfullscreen", "async", "autofocus", "checked", "compact", "declare", "default", "defer", "disabled",
+//            "formnovalidate", "hidden", "inert", "ismap", "itemscope", "multiple", "muted", "nohref", "noresize",
+//            "noshade", "novalidate", "nowrap", "open", "readonly", "required", "reversed", "seamless", "selected",
+//            "sortable", "truespeed", "typemustmatch"
+//    };
+
+    private String key;
+    private String val;
+    Attributes parent; // used to update the holding Attributes when the key / value is changed via this interface
+
+    /**
+     * Create a new attribute from unencoded (raw) key and value.
+     * @param key attribute key; case is preserved.
+     * @param value attribute value
+     */
+    public Attribute(String key, String value) {
+        this(key, value, null);
+    }
+
+    /**
+     * Create a new attribute from unencoded (raw) key and value.
+     * @param key attribute key; case is preserved.
+     * @param val attribute value
+     * @param parent the containing Attributes (this Attribute is not automatically added to said Attributes)
+     */
+    public Attribute(String key, String val, Attributes parent) {
+        Validate.notNull(key);
+        this.key = key.trim();
+        Validate.notEmpty(key); // trimming could potentially make empty, so validate here
+        this.val = val;
+        this.parent = parent;
+    }
+
+    /**
+     Get the attribute key.
+     @return the attribute key
+     */
+    public String getKey() {
+        return key;
+    }
+
+    /**
+     Set the attribute key; case is preserved.
+     @param key the new key; must not be null
+     */
+    public void setKey(String key) {
+        Validate.notNull(key);
+        key = key.trim();
+        Validate.notEmpty(key); // trimming could potentially make empty, so validate here
+        if (parent != null) {
+            int i = parent.indexOfKey(this.key);
+            if (i != Attributes.NotFound)
+                parent.keys[i] = key;
+        }
+        this.key = key;
+    }
+
+    /**
+     Get the attribute value.
+     @return the attribute value
+     */
+    public String getValue() {
+        return val;
+    }
+
+    /**
+     Set the attribute value.
+     @param val the new attribute value; must not be null
+     */
+    public String setValue(String val) {
+        String oldVal = parent.get(this.key);
+        if (parent != null) {
+            int i = parent.indexOfKey(this.key);
+            if (i != Attributes.NotFound)
+                parent.vals[i] = val;
+        }
+        this.val = val;
+        return oldVal;
+    }
+
+//    /**
+//     Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
+//     @return HTML
+//     */
+//    public String html() {
+//        StringBuilder accum = new StringBuilder();
+//
+//        try {
+//            html(accum, (new Document("")).outputSettings());
+//        } catch(IOException exception) {
+//            throw new SerializationException(exception);
+//        }
+//        return accum.toString();
+//    }
+//
+//    protected static void html(String key, String val, Appendable accum, Document.OutputSettings out) throws IOException {
+//        accum.append(key);
+//        if (!shouldCollapseAttribute(key, val, out)) {
+//            accum.append("=\"");
+//            Entities.escape(accum, Attributes.checkNotNull(val) , out, true, false, false);
+//            accum.append('"');
+//        }
+//    }
+//
+//    protected void html(Appendable accum, Document.OutputSettings out) throws IOException {
+//        html(key, val, accum, out);
+//    }
+
+//    /**
+//     Get the string representation of this attribute, implemented as {@link #html()}.
+//     @return string
+//     */
+//    @Override
+//    public String toString() {
+//        return html();
+//    }
+
+//    /**
+//     * Create a new Attribute from an unencoded key and a HTML attribute encoded value.
+//     * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
+//     * @param encodedValue HTML attribute encoded value
+//     * @return attribute
+//     */
+//    public static Attribute createFromEncoded(String unencodedKey, String encodedValue) {
+//        String value = Entities.unescape(encodedValue, true);
+//        return new Attribute(unencodedKey, value, null); // parent will get set when Put
+//    }
+
+    protected boolean isDataAttribute() {
+        return isDataAttribute(key);
+    }
+
+    protected static boolean isDataAttribute(String key) {
+        return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length();
+    }
+
+//    /**
+//     * Collapsible if it's a boolean attribute and value is empty or same as name
+//     *
+//     * @param out output settings
+//     * @return  Returns whether collapsible or not
+//     */
+//    protected final boolean shouldCollapseAttribute(Document.OutputSettings out) {
+//        return shouldCollapseAttribute(key, val, out);
+//    }
+
+//    protected static boolean shouldCollapseAttribute(final String key, final String val, final Document.OutputSettings out) {
+//        return (
+//                out.syntax() == Document.OutputSettings.Syntax.html &&
+//                        (val == null || ("".equals(val) || val.equalsIgnoreCase(key)) && Attribute.isBooleanAttribute(key)));
+//    }
+
+//    /**
+//     * @deprecated
+//     */
+//    protected boolean isBooleanAttribute() {
+//        return Arrays.binarySearch(booleanAttributes, key) >= 0 || val == null;
+//    }
+//
+//    /**
+//     * Checks if this attribute name is defined as a boolean attribute in HTML5
+//     */
+//    protected static boolean isBooleanAttribute(final String key) {
+//        return Arrays.binarySearch(booleanAttributes, key) >= 0;
+//    }
+
+    @Override
+    public boolean equals(Object o) { // note parent not considered
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        Attribute attribute = (Attribute) o;
+        if (key != null ? !key.equals(attribute.key) : attribute.key != null) return false;
+        return val != null ? val.equals(attribute.val) : attribute.val == null;
+    }
+
+    @Override
+    public int hashCode() { // note parent not considered
+        int result = key != null ? key.hashCode() : 0;
+        result = 31 * result + (val != null ? val.hashCode() : 0);
+        return result;
+    }
+
+    @Override
+    public Attribute clone() {
+        try {
+            return (Attribute) super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attributes.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attributes.java
new file mode 100644
index 00000000..f00ecfe1
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Attributes.java
@@ -0,0 +1,444 @@
+package ru.noties.markwon.html.jsoup.nodes;
+
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import ru.noties.markwon.html.jsoup.helper.Validate;
+
+import static ru.noties.markwon.html.jsoup.helper.Normalizer.lowerCase;
+
+/**
+ * The attributes of an Element.
+ * <p>
+ * Attributes are treated as a map: there can be only one value associated with an attribute key/name.
+ * </p>
+ * <p>
+ * Attribute name and value comparisons are  generally <b>case sensitive</b>. By default for HTML, attribute names are
+ * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by
+ * name.
+ * </p>
+ *
+ * @author Jonathan Hedley, jonathan@hedley.net
+ */
+public class Attributes implements Iterable<Attribute>, Cloneable {
+    protected static final String dataPrefix = "data-";
+    private static final int InitialCapacity = 4; // todo - analyze Alexa 1MM sites, determine best setting
+
+    // manages the key/val arrays
+    private static final int GrowthFactor = 2;
+    private static final String[] Empty = {};
+    static final int NotFound = -1;
+    private static final String EmptyString = "";
+
+    private int size = 0; // number of slots used (not capacity, which is keys.length
+    String[] keys = Empty;
+    String[] vals = Empty;
+
+    // check there's room for more
+    private void checkCapacity(int minNewSize) {
+        Validate.isTrue(minNewSize >= size);
+        int curSize = keys.length;
+        if (curSize >= minNewSize)
+            return;
+
+        int newSize = curSize >= InitialCapacity ? size * GrowthFactor : InitialCapacity;
+        if (minNewSize > newSize)
+            newSize = minNewSize;
+
+        keys = copyOf(keys, newSize);
+        vals = copyOf(vals, newSize);
+    }
+
+    // simple implementation of Arrays.copy, for support of Android API 8.
+    private static String[] copyOf(String[] orig, int size) {
+        final String[] copy = new String[size];
+        System.arraycopy(orig, 0, copy, 0,
+                Math.min(orig.length, size));
+        return copy;
+    }
+
+    int indexOfKey(String key) {
+        Validate.notNull(key);
+        for (int i = 0; i < size; i++) {
+            if (key.equals(keys[i]))
+                return i;
+        }
+        return NotFound;
+    }
+
+    private int indexOfKeyIgnoreCase(String key) {
+        Validate.notNull(key);
+        for (int i = 0; i < size; i++) {
+            if (key.equalsIgnoreCase(keys[i]))
+                return i;
+        }
+        return NotFound;
+    }
+
+    // we track boolean attributes as null in values - they're just keys. so returns empty for consumers
+    static String checkNotNull(String val) {
+        return val == null ? EmptyString : val;
+    }
+
+    /**
+     Get an attribute value by key.
+     @param key the (case-sensitive) attribute key
+     @return the attribute value if set; or empty string if not set (or a boolean attribute).
+     @see #hasKey(String)
+     */
+    public String get(String key) {
+        int i = indexOfKey(key);
+        return i == NotFound ? EmptyString : checkNotNull(vals[i]);
+    }
+
+    /**
+     * Get an attribute's value by case-insensitive key
+     * @param key the attribute name
+     * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute).
+     */
+    public String getIgnoreCase(String key) {
+        int i = indexOfKeyIgnoreCase(key);
+        return i == NotFound ? EmptyString : checkNotNull(vals[i]);
+    }
+
+    // adds without checking if this key exists
+    private void add(String key, String value) {
+        checkCapacity(size + 1);
+        keys[size] = key;
+        vals[size] = value;
+        size++;
+    }
+
+    /**
+     * Set a new attribute, or replace an existing one by key.
+     * @param key case sensitive attribute key
+     * @param value attribute value
+     * @return these attributes, for chaining
+     */
+    public Attributes put(String key, String value) {
+        int i = indexOfKey(key);
+        if (i != NotFound)
+            vals[i] = value;
+        else
+            add(key, value);
+        return this;
+    }
+
+    void putIgnoreCase(String key, String value) {
+        int i = indexOfKeyIgnoreCase(key);
+        if (i != NotFound) {
+            vals[i] = value;
+            if (!keys[i].equals(key)) // case changed, update
+                keys[i] = key;
+        }
+        else
+            add(key, value);
+    }
+
+    /**
+     * Set a new boolean attribute, remove attribute if value is false.
+     * @param key case <b>insensitive</b> attribute key
+     * @param value attribute value
+     * @return these attributes, for chaining
+     */
+    public Attributes put(String key, boolean value) {
+        if (value)
+            putIgnoreCase(key, null);
+        else
+            remove(key);
+        return this;
+    }
+
+    /**
+     Set a new attribute, or replace an existing one by key.
+     @param attribute attribute with case sensitive key
+     @return these attributes, for chaining
+     */
+    public Attributes put(Attribute attribute) {
+        Validate.notNull(attribute);
+        put(attribute.getKey(), attribute.getValue());
+        attribute.parent = this;
+        return this;
+    }
+
+    // removes and shifts up
+    private void remove(int index) {
+        Validate.isFalse(index >= size);
+        int shifted = size - index - 1;
+        if (shifted > 0) {
+            System.arraycopy(keys, index + 1, keys, index, shifted);
+            System.arraycopy(vals, index + 1, vals, index, shifted);
+        }
+        size--;
+        keys[size] = null; // release hold
+        vals[size] = null;
+    }
+
+    /**
+     Remove an attribute by key. <b>Case sensitive.</b>
+     @param key attribute key to remove
+     */
+    public void remove(String key) {
+        int i = indexOfKey(key);
+        if (i != NotFound)
+            remove(i);
+    }
+
+    /**
+     Remove an attribute by key. <b>Case insensitive.</b>
+     @param key attribute key to remove
+     */
+    public void removeIgnoreCase(String key) {
+        int i = indexOfKeyIgnoreCase(key);
+        if (i != NotFound)
+            remove(i);
+    }
+
+    /**
+     Tests if these attributes contain an attribute with this key.
+     @param key case-sensitive key to check for
+     @return true if key exists, false otherwise
+     */
+    public boolean hasKey(String key) {
+        return indexOfKey(key) != NotFound;
+    }
+
+    /**
+     Tests if these attributes contain an attribute with this key.
+     @param key key to check for
+     @return true if key exists, false otherwise
+     */
+    public boolean hasKeyIgnoreCase(String key) {
+        return indexOfKeyIgnoreCase(key) != NotFound;
+    }
+
+    /**
+     Get the number of attributes in this set.
+     @return size
+     */
+    public int size() {
+        return size;
+    }
+
+    /**
+     Add all the attributes from the incoming set to this set.
+     @param incoming attributes to add to these attributes.
+     */
+    public void addAll(Attributes incoming) {
+        if (incoming.size() == 0)
+            return;
+        checkCapacity(size + incoming.size);
+
+        for (Attribute attr : incoming) {
+            // todo - should this be case insensitive?
+            put(attr);
+        }
+
+    }
+
+    public Iterator<Attribute> iterator() {
+        return new Iterator<Attribute>() {
+            int i = 0;
+
+            @Override
+            public boolean hasNext() {
+                return i < size;
+            }
+
+            @Override
+            public Attribute next() {
+                final Attribute attr = new Attribute(keys[i], vals[i], Attributes.this);
+                i++;
+                return attr;
+            }
+
+            @Override
+            public void remove() {
+                Attributes.this.remove(--i); // next() advanced, so rewind
+            }
+        };
+    }
+
+    /**
+     Get the attributes as a List, for iteration.
+     @return an view of the attributes as an unmodifialbe List.
+     */
+    public List<Attribute> asList() {
+        ArrayList<Attribute> list = new ArrayList<>(size);
+        for (int i = 0; i < size; i++) {
+//            Attribute attr = vals[i] == null ?
+//                    new BooleanAttribute(keys[i]) : // deprecated class, but maybe someone still wants it
+//                    new Attribute(keys[i], vals[i], Attributes.this);
+//            list.add(attr);
+            list.add(new Attribute(keys[i], vals[i], Attributes.this));
+        }
+        return Collections.unmodifiableList(list);
+    }
+
+    /**
+     * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
+     * starting with {@code data-}.
+     * @return map of custom data attributes.
+     */
+    public Map<String, String> dataset() {
+        return new Dataset(this);
+    }
+
+//    /**
+//     Get the HTML representation of these attributes.
+//     @return HTML
+//     @throws SerializationException if the HTML representation of the attributes cannot be constructed.
+//     */
+//    public String html() {
+//        StringBuilder accum = new StringBuilder();
+//        try {
+//            html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
+//        } catch (IOException e) { // ought never happen
+//            throw new SerializationException(e);
+//        }
+//        return accum.toString();
+//    }
+//
+//    final void html(final Appendable accum, final Document.OutputSettings out) throws IOException {
+//        final int sz = size;
+//        for (int i = 0; i < sz; i++) {
+//            // inlined from Attribute.html()
+//            final String key = keys[i];
+//            final String val = vals[i];
+//            accum.append(' ').append(key);
+//
+//            // collapse checked=null, checked="", checked=checked; write out others
+//            if (!Attribute.shouldCollapseAttribute(key, val, out)) {
+//                accum.append("=\"");
+//                Entities.escape(accum, val == null ? EmptyString : val, out, true, false, false);
+//                accum.append('"');
+//            }
+//        }
+//    }
+//
+//    @Override
+//    public String toString() {
+//        return html();
+//    }
+
+    /**
+     * Checks if these attributes are equal to another set of attributes, by comparing the two sets
+     * @param o attributes to compare with
+     * @return if both sets of attributes have the same content
+     */
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        Attributes that = (Attributes) o;
+
+        if (size != that.size) return false;
+        if (!Arrays.equals(keys, that.keys)) return false;
+        return Arrays.equals(vals, that.vals);
+    }
+
+    /**
+     * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes.
+     * @return calculated hashcode
+     */
+    @Override
+    public int hashCode() {
+        int result = size;
+        result = 31 * result + Arrays.hashCode(keys);
+        result = 31 * result + Arrays.hashCode(vals);
+        return result;
+    }
+
+    @Override
+    public Attributes clone() {
+        Attributes clone;
+        try {
+            clone = (Attributes) super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new RuntimeException(e);
+        }
+        clone.size = size;
+        keys = copyOf(keys, size);
+        vals = copyOf(vals, size);
+        return clone;
+    }
+
+    /**
+     * Internal method. Lowercases all keys.
+     */
+    public void normalize() {
+        for (int i = 0; i < size; i++) {
+            keys[i] = lowerCase(keys[i]);
+        }
+    }
+
+    private static class Dataset extends AbstractMap<String, String> {
+        private final Attributes attributes;
+
+        private Dataset(Attributes attributes) {
+            this.attributes = attributes;
+        }
+
+        @Override
+        public Set<Entry<String, String>> entrySet() {
+            return new EntrySet();
+        }
+
+        @Override
+        public String put(String key, String value) {
+            String dataKey = dataKey(key);
+            String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null;
+            attributes.put(dataKey, value);
+            return oldValue;
+        }
+
+        private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
+
+            @Override
+            public Iterator<Map.Entry<String, String>> iterator() {
+                return new DatasetIterator();
+            }
+
+            @Override
+            public int size() {
+                int count = 0;
+                Iterator iter = new DatasetIterator();
+                while (iter.hasNext())
+                    count++;
+                return count;
+            }
+        }
+
+        private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
+            private Iterator<Attribute> attrIter = attributes.iterator();
+            private Attribute attr;
+            public boolean hasNext() {
+                while (attrIter.hasNext()) {
+                    attr = attrIter.next();
+                    if (attr.isDataAttribute()) return true;
+                }
+                return false;
+            }
+
+            public Entry<String, String> next() {
+                return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
+            }
+
+            public void remove() {
+                attributes.remove(attr.getKey());
+            }
+        }
+    }
+
+    private static String dataKey(String key) {
+        return dataPrefix + key;
+    }
+}
\ No newline at end of file
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/DocumentType.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/DocumentType.java
new file mode 100644
index 00000000..dc11e537
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/DocumentType.java
@@ -0,0 +1,104 @@
+package ru.noties.markwon.html.jsoup.nodes;
+
+/**
+ * A {@code <!DOCTYPE>} node.
+ */
+public class DocumentType /*extends LeafNode*/ {
+    // todo needs a bit of a chunky cleanup. this level of detail isn't needed
+    public static final String PUBLIC_KEY = "PUBLIC";
+    public static final String SYSTEM_KEY = "SYSTEM";
+//    private static final String NAME = "name";
+//    private static final String PUB_SYS_KEY = "pubSysKey"; // PUBLIC or SYSTEM
+//    private static final String PUBLIC_ID = "publicId";
+//    private static final String SYSTEM_ID = "systemId";
+    // todo: quirk mode from publicId and systemId
+
+//    /**
+//     * Create a new doctype element.
+//     * @param name the doctype's name
+//     * @param publicId the doctype's public ID
+//     * @param systemId the doctype's system ID
+//     */
+//    public DocumentType(String name, String publicId, String systemId) {
+//        Validate.notNull(name);
+//        Validate.notNull(publicId);
+//        Validate.notNull(systemId);
+//        attr(NAME, name);
+//        attr(PUBLIC_ID, publicId);
+//        if (has(PUBLIC_ID)) {
+//            attr(PUB_SYS_KEY, PUBLIC_KEY);
+//        }
+//        attr(SYSTEM_ID, systemId);
+//    }
+//
+//    /**
+//     * Create a new doctype element.
+//     * @param name the doctype's name
+//     * @param publicId the doctype's public ID
+//     * @param systemId the doctype's system ID
+//     * @param baseUri unused
+//     * @deprecated
+//     */
+//    public DocumentType(String name, String publicId, String systemId, String baseUri) {
+//        attr(NAME, name);
+//        attr(PUBLIC_ID, publicId);
+//        if (has(PUBLIC_ID)) {
+//            attr(PUB_SYS_KEY, PUBLIC_KEY);
+//        }
+//        attr(SYSTEM_ID, systemId);
+//    }
+//
+//    /**
+//     * Create a new doctype element.
+//     * @param name the doctype's name
+//     * @param publicId the doctype's public ID
+//     * @param systemId the doctype's system ID
+//     * @param baseUri unused
+//     * @deprecated
+//     */
+//    public DocumentType(String name, String pubSysKey, String publicId, String systemId, String baseUri) {
+//        attr(NAME, name);
+//        if (pubSysKey != null) {
+//            attr(PUB_SYS_KEY, pubSysKey);
+//        }
+//        attr(PUBLIC_ID, publicId);
+//        attr(SYSTEM_ID, systemId);
+//    }
+//    public void setPubSysKey(String value) {
+//        if (value != null)
+//            attr(PUB_SYS_KEY, value);
+//    }
+//
+//    @Override
+//    public String nodeName() {
+//        return "#doctype";
+//    }
+//
+//    @Override
+//    void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
+//        if (out.syntax() == Syntax.html && !has(PUBLIC_ID) && !has(SYSTEM_ID)) {
+//            // looks like a html5 doctype, go lowercase for aesthetics
+//            accum.append("<!doctype");
+//        } else {
+//            accum.append("<!DOCTYPE");
+//        }
+//        if (has(NAME))
+//            accum.append(" ").append(attr(NAME));
+//        if (has(PUB_SYS_KEY))
+//            accum.append(" ").append(attr(PUB_SYS_KEY));
+//        if (has(PUBLIC_ID))
+//            accum.append(" \"").append(attr(PUBLIC_ID)).append('"');
+//        if (has(SYSTEM_ID))
+//            accum.append(" \"").append(attr(SYSTEM_ID)).append('"');
+//        accum.append('>');
+//    }
+//
+//    @Override
+//    void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {
+//    }
+//
+//    private boolean has(final String attribute) {
+//        return !StringUtil.isBlank(attr(attribute));
+//    }
+}
+
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Entities.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Entities.java
new file mode 100644
index 00000000..c6c8d829
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/Entities.java
@@ -0,0 +1,351 @@
+package ru.noties.markwon.html.jsoup.nodes;
+
+import java.nio.charset.CharsetEncoder;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import ru.noties.markwon.html.jsoup.helper.Validate;
+import ru.noties.markwon.html.jsoup.parser.CharacterReader;
+
+import static ru.noties.markwon.html.jsoup.nodes.Entities.EscapeMode.base;
+import static ru.noties.markwon.html.jsoup.nodes.Entities.EscapeMode.extended;
+
+/**
+ * HTML entities, and escape routines. Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C
+ * HTML named character references</a>.
+ */
+public class Entities {
+    private static final int empty = -1;
+    private static final String emptyName = "";
+    static final int codepointRadix = 36;
+    private static final char[] codeDelims = {',', ';'};
+    private static final HashMap<String, String> multipoints = new HashMap<>(); // name -> multiple character references
+//    private static final Document.OutputSettings DefaultOutput = new Document.OutputSettings();
+
+    public enum EscapeMode {
+        /**
+         * Restricted entities suitable for XHTML output: lt, gt, amp, and quot only.
+         */
+        xhtml(EntitiesData.xmlPoints, 4),
+        /**
+         * Default HTML output entities.
+         */
+        base(EntitiesData.basePoints, 106),
+        /**
+         * Complete HTML entities.
+         */
+        extended(EntitiesData.fullPoints, 2125);
+
+        // table of named references to their codepoints. sorted so we can binary search. built by BuildEntities.
+        private String[] nameKeys;
+        private int[] codeVals; // limitation is the few references with multiple characters; those go into multipoints.
+
+        // table of codepoints to named entities.
+        private int[] codeKeys; // we don' support multicodepoints to single named value currently
+        private String[] nameVals;
+
+        EscapeMode(String file, int size) {
+            load(this, file, size);
+        }
+
+        int codepointForName(final String name) {
+            int index = Arrays.binarySearch(nameKeys, name);
+            return index >= 0 ? codeVals[index] : empty;
+        }
+
+        String nameForCodepoint(final int codepoint) {
+            final int index = Arrays.binarySearch(codeKeys, codepoint);
+            if (index >= 0) {
+                // the results are ordered so lower case versions of same codepoint come after uppercase, and we prefer to emit lower
+                // (and binary search for same item with multi results is undefined
+                return (index < nameVals.length - 1 && codeKeys[index + 1] == codepoint) ?
+                        nameVals[index + 1] : nameVals[index];
+            }
+            return emptyName;
+        }
+
+        private int size() {
+            return nameKeys.length;
+        }
+    }
+
+    private Entities() {
+    }
+
+    /**
+     * Check if the input is a known named entity
+     *
+     * @param name the possible entity name (e.g. "lt" or "amp")
+     * @return true if a known named entity
+     */
+    public static boolean isNamedEntity(final String name) {
+        return extended.codepointForName(name) != empty;
+    }
+
+    /**
+     * Check if the input is a known named entity in the base entity set.
+     *
+     * @param name the possible entity name (e.g. "lt" or "amp")
+     * @return true if a known named entity in the base set
+     * @see #isNamedEntity(String)
+     */
+    public static boolean isBaseNamedEntity(final String name) {
+        return base.codepointForName(name) != empty;
+    }
+
+    /**
+     * Get the Character value of the named entity
+     *
+     * @param name named entity (e.g. "lt" or "amp")
+     * @return the Character value of the named entity (e.g. '{@literal <}' or '{@literal &}')
+     * @deprecated does not support characters outside the BMP or multiple character names
+     */
+    public static Character getCharacterByName(String name) {
+        return (char) extended.codepointForName(name);
+    }
+
+    /**
+     * Get the character(s) represented by the named entity
+     *
+     * @param name entity (e.g. "lt" or "amp")
+     * @return the string value of the character(s) represented by this entity, or "" if not defined
+     */
+    public static String getByName(String name) {
+        String val = multipoints.get(name);
+        if (val != null)
+            return val;
+        int codepoint = extended.codepointForName(name);
+        if (codepoint != empty)
+            return new String(new int[]{codepoint}, 0, 1);
+        return emptyName;
+    }
+
+    public static int codepointsForName(final String name, final int[] codepoints) {
+        String val = multipoints.get(name);
+        if (val != null) {
+            codepoints[0] = val.codePointAt(0);
+            codepoints[1] = val.codePointAt(1);
+            return 2;
+        }
+        int codepoint = extended.codepointForName(name);
+        if (codepoint != empty) {
+            codepoints[0] = codepoint;
+            return 1;
+        }
+        return 0;
+    }
+
+//    /**
+//     * HTML escape an input string. That is, {@code <} is returned as {@code &lt;}
+//     *
+//     * @param string the un-escaped string to escape
+//     * @param out the output settings to use
+//     * @return the escaped string
+//     */
+//    public static String escape(String string, Document.OutputSettings out) {
+//        if (string == null)
+//            return "";
+//        StringBuilder accum = new StringBuilder(string.length() * 2);
+//        try {
+//            escape(accum, string, out, false, false, false);
+//        } catch (IOException e) {
+//            throw new SerializationException(e); // doesn't happen
+//        }
+//        return accum.toString();
+//    }
+
+//    /**
+//     * HTML escape an input string, using the default settings (UTF-8, base entities). That is, {@code <} is returned as
+//     * {@code &lt;}
+//     *
+//     * @param string the un-escaped string to escape
+//     * @return the escaped string
+//     */
+//    public static String escape(String string) {
+//        return escape(string, DefaultOutput);
+//    }
+//
+//    // this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations
+//    static void escape(Appendable accum, String string, Document.OutputSettings out,
+//                       boolean inAttribute, boolean normaliseWhite, boolean stripLeadingWhite) throws IOException {
+//
+//        boolean lastWasWhite = false;
+//        boolean reachedNonWhite = false;
+//        final EscapeMode escapeMode = out.escapeMode();
+//        final CharsetEncoder encoder = out.encoder();
+//        final CoreCharset coreCharset = out.coreCharset; // init in out.prepareEncoder()
+//        final int length = string.length();
+//
+//        int codePoint;
+//        for (int offset = 0; offset < length; offset += Character.charCount(codePoint)) {
+//            codePoint = string.codePointAt(offset);
+//
+//            if (normaliseWhite) {
+//                if (StringUtil.isWhitespace(codePoint)) {
+//                    if ((stripLeadingWhite && !reachedNonWhite) || lastWasWhite)
+//                        continue;
+//                    accum.append(' ');
+//                    lastWasWhite = true;
+//                    continue;
+//                } else {
+//                    lastWasWhite = false;
+//                    reachedNonWhite = true;
+//                }
+//            }
+//            // surrogate pairs, split implementation for efficiency on single char common case (saves creating strings, char[]):
+//            if (codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+//                final char c = (char) codePoint;
+//                // html specific and required escapes:
+//                switch (c) {
+//                    case '&':
+//                        accum.append("&amp;");
+//                        break;
+//                    case 0xA0:
+//                        if (escapeMode != EscapeMode.xhtml)
+//                            accum.append("&nbsp;");
+//                        else
+//                            accum.append("&#xa0;");
+//                        break;
+//                    case '<':
+//                        // escape when in character data or when in a xml attribue val; not needed in html attr val
+//                        if (!inAttribute || escapeMode == EscapeMode.xhtml)
+//                            accum.append("&lt;");
+//                        else
+//                            accum.append(c);
+//                        break;
+//                    case '>':
+//                        if (!inAttribute)
+//                            accum.append("&gt;");
+//                        else
+//                            accum.append(c);
+//                        break;
+//                    case '"':
+//                        if (inAttribute)
+//                            accum.append("&quot;");
+//                        else
+//                            accum.append(c);
+//                        break;
+//                    default:
+//                        if (canEncode(coreCharset, c, encoder))
+//                            accum.append(c);
+//                        else
+//                            appendEncoded(accum, escapeMode, codePoint);
+//                }
+//            } else {
+//                final String c = new String(Character.toChars(codePoint));
+//                if (encoder.canEncode(c)) // uses fallback encoder for simplicity
+//                    accum.append(c);
+//                else
+//                    appendEncoded(accum, escapeMode, codePoint);
+//            }
+//        }
+//    }
+
+//    private static void appendEncoded(Appendable accum, EscapeMode escapeMode, int codePoint) throws IOException {
+//        final String name = escapeMode.nameForCodepoint(codePoint);
+//        if (name != emptyName) // ok for identity check
+//            accum.append('&').append(name).append(';');
+//        else
+//            accum.append("&#x").append(Integer.toHexString(codePoint)).append(';');
+//    }
+
+//    /**
+//     * Un-escape an HTML escaped string. That is, {@code &lt;} is returned as {@code <}.
+//     *
+//     * @param string the HTML string to un-escape
+//     * @return the unescaped string
+//     */
+//    public static String unescape(String string) {
+//        return unescape(string, false);
+//    }
+
+//    /**
+//     * Unescape the input string.
+//     *
+//     * @param string to un-HTML-escape
+//     * @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional)
+//     * @return unescaped string
+//     */
+//    static String unescape(String string, boolean strict) {
+//        return Parser.unescapeEntities(string, strict);
+//    }
+
+    /*
+     * Provides a fast-path for Encoder.canEncode, which drastically improves performance on Android post JellyBean.
+     * After KitKat, the implementation of canEncode degrades to the point of being useless. For non ASCII or UTF,
+     * performance may be bad. We can add more encoders for common character sets that are impacted by performance
+     * issues on Android if required.
+     *
+     * Benchmarks:     *
+     * OLD toHtml() impl v New (fastpath) in millis
+     * Wiki: 1895, 16
+     * CNN: 6378, 55
+     * Alterslash: 3013, 28
+     * Jsoup: 167, 2
+     */
+    private static boolean canEncode(final CoreCharset charset, final char c, final CharsetEncoder fallback) {
+        // todo add more charset tests if impacted by Android's bad perf in canEncode
+        switch (charset) {
+            case ascii:
+                return c < 0x80;
+            case utf:
+                return true; // real is:!(Character.isLowSurrogate(c) || Character.isHighSurrogate(c)); - but already check above
+            default:
+                return fallback.canEncode(c);
+        }
+    }
+
+    enum CoreCharset {
+        ascii, utf, fallback;
+
+        static CoreCharset byName(final String name) {
+            if (name.equals("US-ASCII"))
+                return ascii;
+            if (name.startsWith("UTF-")) // covers UTF-8, UTF-16, et al
+                return utf;
+            return fallback;
+        }
+    }
+
+    private static void load(EscapeMode e, String pointsData, int size) {
+        e.nameKeys = new String[size];
+        e.codeVals = new int[size];
+        e.codeKeys = new int[size];
+        e.nameVals = new String[size];
+
+        int i = 0;
+        CharacterReader reader = new CharacterReader(pointsData);
+
+        while (!reader.isEmpty()) {
+            // NotNestedLessLess=10913,824;1887&
+
+            final String name = reader.consumeTo('=');
+            reader.advance();
+            final int cp1 = Integer.parseInt(reader.consumeToAny(codeDelims), codepointRadix);
+            final char codeDelim = reader.current();
+            reader.advance();
+            final int cp2;
+            if (codeDelim == ',') {
+                cp2 = Integer.parseInt(reader.consumeTo(';'), codepointRadix);
+                reader.advance();
+            } else {
+                cp2 = empty;
+            }
+            final String indexS = reader.consumeTo('&');
+            final int index = Integer.parseInt(indexS, codepointRadix);
+            reader.advance();
+
+            e.nameKeys[i] = name;
+            e.codeVals[i] = cp1;
+            e.codeKeys[index] = cp1;
+            e.nameVals[index] = name;
+
+            if (cp2 != empty) {
+                multipoints.put(name, new String(new int[]{cp1, cp2}, 0, 2));
+            }
+            i++;
+        }
+
+        Validate.isTrue(i == size, "Unexpected count of entities loaded");
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/EntitiesData.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/EntitiesData.java
new file mode 100644
index 00000000..036c712f
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/nodes/EntitiesData.java
@@ -0,0 +1,11 @@
+package ru.noties.markwon.html.jsoup.nodes;
+
+/**
+ * Holds packed data that represents Entity name=value pairs. Parsed by Entities, created by BuildEntities.
+ */
+class EntitiesData {
+    static final String xmlPoints = "amp=12;1&gt=1q;3&lt=1o;2&quot=y;0&";
+    static final String basePoints = "AElig=5i;1c&AMP=12;2&Aacute=5d;17&Acirc=5e;18&Agrave=5c;16&Aring=5h;1b&Atilde=5f;19&Auml=5g;1a&COPY=4p;h&Ccedil=5j;1d&ETH=5s;1m&Eacute=5l;1f&Ecirc=5m;1g&Egrave=5k;1e&Euml=5n;1h&GT=1q;6&Iacute=5p;1j&Icirc=5q;1k&Igrave=5o;1i&Iuml=5r;1l&LT=1o;4&Ntilde=5t;1n&Oacute=5v;1p&Ocirc=5w;1q&Ograve=5u;1o&Oslash=60;1u&Otilde=5x;1r&Ouml=5y;1s&QUOT=y;0&REG=4u;n&THORN=66;20&Uacute=62;1w&Ucirc=63;1x&Ugrave=61;1v&Uuml=64;1y&Yacute=65;1z&aacute=69;23&acirc=6a;24&acute=50;u&aelig=6e;28&agrave=68;22&amp=12;3&aring=6d;27&atilde=6b;25&auml=6c;26&brvbar=4m;e&ccedil=6f;29&cedil=54;y&cent=4i;a&copy=4p;i&curren=4k;c&deg=4w;q&divide=6v;2p&eacute=6h;2b&ecirc=6i;2c&egrave=6g;2a&eth=6o;2i&euml=6j;2d&frac12=59;13&frac14=58;12&frac34=5a;14&gt=1q;7&iacute=6l;2f&icirc=6m;2g&iexcl=4h;9&igrave=6k;2e&iquest=5b;15&iuml=6n;2h&laquo=4r;k&lt=1o;5&macr=4v;p&micro=51;v&middot=53;x&nbsp=4g;8&not=4s;l&ntilde=6p;2j&oacute=6r;2l&ocirc=6s;2m&ograve=6q;2k&ordf=4q;j&ordm=56;10&oslash=6w;2q&otilde=6t;2n&ouml=6u;2o&para=52;w&plusmn=4x;r&pound=4j;b&quot=y;1&raquo=57;11&reg=4u;o&sect=4n;f&shy=4t;m&sup1=55;z&sup2=4y;s&sup3=4z;t&szlig=67;21&thorn=72;2w&times=5z;1t&uacute=6y;2s&ucirc=6z;2t&ugrave=6x;2r&uml=4o;g&uuml=70;2u&yacute=71;2v&yen=4l;d&yuml=73;2x&";
+    static final String fullPoints = "AElig=5i;2v&AMP=12;8&Aacute=5d;2p&Abreve=76;4k&Acirc=5e;2q&Acy=sw;av&Afr=2kn8;1kh&Agrave=5c;2o&Alpha=pd;8d&Amacr=74;4i&And=8cz;1e1&Aogon=78;4m&Aopf=2koo;1ls&ApplyFunction=6e9;ew&Aring=5h;2t&Ascr=2kkc;1jc&Assign=6s4;s6&Atilde=5f;2r&Auml=5g;2s&Backslash=6qe;o1&Barv=8h3;1it&Barwed=6x2;120&Bcy=sx;aw&Because=6r9;pw&Bernoullis=6jw;gn&Beta=pe;8e&Bfr=2kn9;1ki&Bopf=2kop;1lt&Breve=k8;82&Bscr=6jw;gp&Bumpeq=6ry;ro&CHcy=tj;bi&COPY=4p;1q&Cacute=7a;4o&Cap=6vm;zz&CapitalDifferentialD=6kl;h8&Cayleys=6jx;gq&Ccaron=7g;4u&Ccedil=5j;2w&Ccirc=7c;4q&Cconint=6r4;pn&Cdot=7e;4s&Cedilla=54;2e&CenterDot=53;2b&Cfr=6jx;gr&Chi=pz;8y&CircleDot=6u1;x8&CircleMinus=6ty;x3&CirclePlus=6tx;x1&CircleTimes=6tz;x5&ClockwiseContourIntegral=6r6;pp&CloseCurlyDoubleQuote=6cd;e0&CloseCurlyQuote=6c9;dt&Colon=6rb;q1&Colone=8dw;1en&Congruent=6sh;sn&Conint=6r3;pm&ContourIntegral=6r2;pi&Copf=6iq;f7&Coproduct=6q8;nq&CounterClockwiseContourIntegral=6r7;pr&Cross=8bz;1d8&Cscr=2kke;1jd&Cup=6vn;100&CupCap=6rx;rk&DD=6kl;h9&DDotrahd=841;184&DJcy=si;ai&DScy=sl;al&DZcy=sv;au&Dagger=6ch;e7&Darr=6n5;j5&Dashv=8h0;1ir&Dcaron=7i;4w&Dcy=t0;az&Del=6pz;n9&Delta=pg;8g&Dfr=2knb;1kj&DiacriticalAcute=50;27&DiacriticalDot=k9;84&DiacriticalDoubleAcute=kd;8a&DiacriticalGrave=2o;13&DiacriticalTilde=kc;88&Diamond=6v8;za&DifferentialD=6km;ha&Dopf=2kor;1lu&Dot=4o;1n&DotDot=6ho;f5&DotEqual=6s0;rw&DoubleContourIntegral=6r3;pl&DoubleDot=4o;1m&DoubleDownArrow=6oj;m0&DoubleLeftArrow=6og;lq&DoubleLeftRightArrow=6ok;m3&DoubleLeftTee=8h0;1iq&DoubleLongLeftArrow=7w8;17g&DoubleLongLeftRightArrow=7wa;17m&DoubleLongRightArrow=7w9;17j&DoubleRightArrow=6oi;lw&DoubleRightTee=6ug;xz&DoubleUpArrow=6oh;lt&DoubleUpDownArrow=6ol;m7&DoubleVerticalBar=6qt;ov&DownArrow=6mr;i8&DownArrowBar=843;186&DownArrowUpArrow=6ph;mn&DownBreve=lt;8c&DownLeftRightVector=85s;198&DownLeftTeeVector=866;19m&DownLeftVector=6nx;ke&DownLeftVectorBar=85y;19e&DownRightTeeVector=867;19n&DownRightVector=6o1;kq&DownRightVectorBar=85z;19f&DownTee=6uc;xs&DownTeeArrow=6nb;jh&Downarrow=6oj;m1&Dscr=2kkf;1je&Dstrok=7k;4y&ENG=96;6g&ETH=5s;35&Eacute=5l;2y&Ecaron=7u;56&Ecirc=5m;2z&Ecy=tp;bo&Edot=7q;52&Efr=2knc;1kk&Egrave=5k;2x&Element=6q0;na&Emacr=7m;50&EmptySmallSquare=7i3;15x&EmptyVerySmallSquare=7fv;150&Eogon=7s;54&Eopf=2kos;1lv&Epsilon=ph;8h&Equal=8dx;1eo&EqualTilde=6rm;qp&Equilibrium=6oc;li&Escr=6k0;gu&Esim=8dv;1em&Eta=pj;8j&Euml=5n;30&Exists=6pv;mz&ExponentialE=6kn;hc&Fcy=tg;bf&Ffr=2knd;1kl&FilledSmallSquare=7i4;15y&FilledVerySmallSquare=7fu;14w&Fopf=2kot;1lw&ForAll=6ps;ms&Fouriertrf=6k1;gv&Fscr=6k1;gw&GJcy=sj;aj&GT=1q;r&Gamma=pf;8f&Gammad=rg;a5&Gbreve=7y;5a&Gcedil=82;5e&Gcirc=7w;58&Gcy=sz;ay&Gdot=80;5c&Gfr=2kne;1km&Gg=6vt;10c&Gopf=2kou;1lx&GreaterEqual=6sl;sv&GreaterEqualLess=6vv;10i&GreaterFullEqual=6sn;t6&GreaterGreater=8f6;1gh&GreaterLess=6t3;ul&GreaterSlantEqual=8e6;1f5&GreaterTilde=6sz;ub&Gscr=2kki;1jf&Gt=6sr;tr&HARDcy=tm;bl&Hacek=jr;80&Hat=2m;10&Hcirc=84;5f&Hfr=6j0;fe&HilbertSpace=6iz;fa&Hopf=6j1;fg&HorizontalLine=7b4;13i&Hscr=6iz;fc&Hstrok=86;5h&HumpDownHump=6ry;rn&HumpEqual=6rz;rs&IEcy=t1;b0&IJlig=8i;5s&IOcy=sh;ah&Iacute=5p;32&Icirc=5q;33&Icy=t4;b3&Idot=8g;5p&Ifr=6j5;fq&Igrave=5o;31&Im=6j5;fr&Imacr=8a;5l&ImaginaryI=6ko;hf&Implies=6oi;ly&Int=6r0;pf&Integral=6qz;pd&Intersection=6v6;z4&InvisibleComma=6eb;f0&InvisibleTimes=6ea;ey&Iogon=8e;5n&Iopf=2kow;1ly&Iota=pl;8l&Iscr=6j4;fn&Itilde=88;5j&Iukcy=sm;am&Iuml=5r;34&Jcirc=8k;5u&Jcy=t5;b4&Jfr=2knh;1kn&Jopf=2kox;1lz&Jscr=2kkl;1jg&Jsercy=so;ao&Jukcy=sk;ak&KHcy=th;bg&KJcy=ss;as&Kappa=pm;8m&Kcedil=8m;5w&Kcy=t6;b5&Kfr=2kni;1ko&Kopf=2koy;1m0&Kscr=2kkm;1jh&LJcy=sp;ap&LT=1o;m&Lacute=8p;5z&Lambda=pn;8n&Lang=7vu;173&Laplacetrf=6j6;fs&Larr=6n2;j1&Lcaron=8t;63&Lcedil=8r;61&Lcy=t7;b6&LeftAngleBracket=7vs;16x&LeftArrow=6mo;hu&LeftArrowBar=6p0;mj&LeftArrowRightArrow=6o6;l3&LeftCeiling=6x4;121&LeftDoubleBracket=7vq;16t&LeftDownTeeVector=869;19p&LeftDownVector=6o3;kw&LeftDownVectorBar=861;19h&LeftFloor=6x6;125&LeftRightArrow=6ms;ib&LeftRightVector=85q;196&LeftTee=6ub;xq&LeftTeeArrow=6n8;ja&LeftTeeVector=862;19i&LeftTriangle=6uq;ya&LeftTriangleBar=89b;1c0&LeftTriangleEqual=6us;yg&LeftUpDownVector=85t;199&LeftUpTeeVector=868;19o&LeftUpVector=6nz;kk&LeftUpVectorBar=860;19g&LeftVector=6nw;kb&LeftVectorBar=85u;19a&Leftarrow=6og;lr&Leftrightarrow=6ok;m4&LessEqualGreater=6vu;10e&LessFullEqual=6sm;t0&LessGreater=6t2;ui&LessLess=8f5;1gf&LessSlantEqual=8e5;1ez&LessTilde=6sy;u8&Lfr=2knj;1kp&Ll=6vs;109&Lleftarrow=6oq;me&Lmidot=8v;65&LongLeftArrow=7w5;177&LongLeftRightArrow=7w7;17d&LongRightArrow=7w6;17a&Longleftarrow=7w8;17h&Longleftrightarrow=7wa;17n&Longrightarrow=7w9;17k&Lopf=2koz;1m1&LowerLeftArrow=6mx;iq&LowerRightArrow=6mw;in&Lscr=6j6;fu&Lsh=6nk;jv&Lstrok=8x;67&Lt=6sq;tl&Map=83p;17v&Mcy=t8;b7&MediumSpace=6e7;eu&Mellintrf=6k3;gx&Mfr=2knk;1kq&MinusPlus=6qb;nv&Mopf=2kp0;1m2&Mscr=6k3;gz&Mu=po;8o&NJcy=sq;aq&Nacute=8z;69&Ncaron=93;6d&Ncedil=91;6b&Ncy=t9;b8&NegativeMediumSpace=6bv;dc&NegativeThickSpace=6bv;dd&NegativeThinSpace=6bv;de&NegativeVeryThinSpace=6bv;db&NestedGreaterGreater=6sr;tq&NestedLessLess=6sq;tk&NewLine=a;1&Nfr=2knl;1kr&NoBreak=6e8;ev&NonBreakingSpace=4g;1d&Nopf=6j9;fx&Not=8h8;1ix&NotCongruent=6si;sp&NotCupCap=6st;tv&NotDoubleVerticalBar=6qu;p0&NotElement=6q1;ne&NotEqual=6sg;sk&NotEqualTilde=6rm,mw;qn&NotExists=6pw;n1&NotGreater=6sv;tz&NotGreaterEqual=6sx;u5&NotGreaterFullEqual=6sn,mw;t3&NotGreaterGreater=6sr,mw;tn&NotGreaterLess=6t5;uq&NotGreaterSlantEqual=8e6,mw;1f2&NotGreaterTilde=6t1;ug&NotHumpDownHump=6ry,mw;rl&NotHumpEqual=6rz,mw;rq&NotLeftTriangle=6wa;113&NotLeftTriangleBar=89b,mw;1bz&NotLeftTriangleEqual=6wc;119&NotLess=6su;tw&NotLessEqual=6sw;u2&NotLessGreater=6t4;uo&NotLessLess=6sq,mw;th&NotLessSlantEqual=8e5,mw;1ew&NotLessTilde=6t0;ue&NotNestedGreaterGreater=8f6,mw;1gg&NotNestedLessLess=8f5,mw;1ge&NotPrecedes=6tc;vb&NotPrecedesEqual=8fj,mw;1gv&NotPrecedesSlantEqual=6w0;10p&NotReverseElement=6q4;nl&NotRightTriangle=6wb;116&NotRightTriangleBar=89c,mw;1c1&NotRightTriangleEqual=6wd;11c&NotSquareSubset=6tr,mw;wh&NotSquareSubsetEqual=6w2;10t&NotSquareSuperset=6ts,mw;wl&NotSquareSupersetEqual=6w3;10v&NotSubset=6te,6he;vh&NotSubsetEqual=6tk;w0&NotSucceeds=6td;ve&NotSucceedsEqual=8fk,mw;1h1&NotSucceedsSlantEqual=6w1;10r&NotSucceedsTilde=6tb,mw;v7&NotSuperset=6tf,6he;vm&NotSupersetEqual=6tl;w3&NotTilde=6rl;ql&NotTildeEqual=6ro;qv&NotTildeFullEqual=6rr;r1&NotTildeTilde=6rt;r9&NotVerticalBar=6qs;or&Nscr=2kkp;1ji&Ntilde=5t;36&Nu=pp;8p&OElig=9e;6m&Oacute=5v;38&Ocirc=5w;39&Ocy=ta;b9&Odblac=9c;6k&Ofr=2knm;1ks&Ograve=5u;37&Omacr=98;6i&Omega=q1;90&Omicron=pr;8r&Oopf=2kp2;1m3&OpenCurlyDoubleQuote=6cc;dy&OpenCurlyQuote=6c8;dr&Or=8d0;1e2&Oscr=2kkq;1jj&Oslash=60;3d&Otilde=5x;3a&Otimes=8c7;1df&Ouml=5y;3b&OverBar=6da;em&OverBrace=732;13b&OverBracket=71w;134&OverParenthesis=730;139&PartialD=6pu;mx&Pcy=tb;ba&Pfr=2knn;1kt&Phi=py;8x&Pi=ps;8s&PlusMinus=4x;22&Poincareplane=6j0;fd&Popf=6jd;g3&Pr=8fv;1hl&Precedes=6t6;us&PrecedesEqual=8fj;1gy&PrecedesSlantEqual=6t8;uy&PrecedesTilde=6ta;v4&Prime=6cz;eg&Product=6q7;no&Proportion=6rb;q0&Proportional=6ql;oa&Pscr=2kkr;1jk&Psi=q0;8z&QUOT=y;3&Qfr=2kno;1ku&Qopf=6je;g5&Qscr=2kks;1jl&RBarr=840;183&REG=4u;1x&Racute=9g;6o&Rang=7vv;174&Rarr=6n4;j4&Rarrtl=846;187&Rcaron=9k;6s&Rcedil=9i;6q&Rcy=tc;bb&Re=6jg;gb&ReverseElement=6q3;nh&ReverseEquilibrium=6ob;le&ReverseUpEquilibrium=86n;1a4&Rfr=6jg;ga&Rho=pt;8t&RightAngleBracket=7vt;170&RightArrow=6mq;i3&RightArrowBar=6p1;ml&RightArrowLeftArrow=6o4;ky&RightCeiling=6x5;123&RightDoubleBracket=7vr;16v&RightDownTeeVector=865;19l&RightDownVector=6o2;kt&RightDownVectorBar=85x;19d&RightFloor=6x7;127&RightTee=6ua;xo&RightTeeArrow=6na;je&RightTeeVector=863;19j&RightTriangle=6ur;yd&RightTriangleBar=89c;1c2&RightTriangleEqual=6ut;yk&RightUpDownVector=85r;197&RightUpTeeVector=864;19k&RightUpVector=6ny;kh&RightUpVectorBar=85w;19c&RightVector=6o0;kn&RightVectorBar=85v;19b&Rightarrow=6oi;lx&Ropf=6jh;gd&RoundImplies=86o;1a6&Rrightarrow=6or;mg&Rscr=6jf;g7&Rsh=6nl;jx&RuleDelayed=8ac;1cb&SHCHcy=tl;bk&SHcy=tk;bj&SOFTcy=to;bn&Sacute=9m;6u&Sc=8fw;1hm&Scaron=9s;70&Scedil=9q;6y&Scirc=9o;6w&Scy=td;bc&Sfr=2knq;1kv&ShortDownArrow=6mr;i7&ShortLeftArrow=6mo;ht&ShortRightArrow=6mq;i2&ShortUpArrow=6mp;hy&Sigma=pv;8u&SmallCircle=6qg;o6&Sopf=2kp6;1m4&Sqrt=6qi;o9&Square=7fl;14t&SquareIntersection=6tv;ww&SquareSubset=6tr;wi&SquareSubsetEqual=6tt;wp&SquareSuperset=6ts;wm&SquareSupersetEqual=6tu;ws&SquareUnion=6tw;wz&Sscr=2kku;1jm&Star=6va;zf&Sub=6vk;zw&Subset=6vk;zv&SubsetEqual=6ti;vu&Succeeds=6t7;uv&SucceedsEqual=8fk;1h4&SucceedsSlantEqual=6t9;v1&SucceedsTilde=6tb;v8&SuchThat=6q3;ni&Sum=6q9;ns&Sup=6vl;zy&Superset=6tf;vp&SupersetEqual=6tj;vx&Supset=6vl;zx&THORN=66;3j&TRADE=6jm;gf&TSHcy=sr;ar&TScy=ti;bh&Tab=9;0&Tau=pw;8v&Tcaron=9w;74&Tcedil=9u;72&Tcy=te;bd&Tfr=2knr;1kw&Therefore=6r8;pt&Theta=pk;8k&ThickSpace=6e7,6bu;et&ThinSpace=6bt;d7&Tilde=6rg;q9&TildeEqual=6rn;qs&TildeFullEqual=6rp;qy&TildeTilde=6rs;r4&Topf=2kp7;1m5&TripleDot=6hn;f3&Tscr=2kkv;1jn&Tstrok=9y;76&Uacute=62;3f&Uarr=6n3;j2&Uarrocir=85l;193&Ubrcy=su;at&Ubreve=a4;7c&Ucirc=63;3g&Ucy=tf;be&Udblac=a8;7g&Ufr=2kns;1kx&Ugrave=61;3e&Umacr=a2;7a&UnderBar=2n;11&UnderBrace=733;13c&UnderBracket=71x;136&UnderParenthesis=731;13a&Union=6v7;z8&UnionPlus=6tq;wf&Uogon=aa;7i&Uopf=2kp8;1m6&UpArrow=6mp;hz&UpArrowBar=842;185&UpArrowDownArrow=6o5;l1&UpDownArrow=6mt;ie&UpEquilibrium=86m;1a2&UpTee=6ud;xv&UpTeeArrow=6n9;jc&Uparrow=6oh;lu&Updownarrow=6ol;m8&UpperLeftArrow=6mu;ih&UpperRightArrow=6mv;ik&Upsi=r6;9z&Upsilon=px;8w&Uring=a6;7e&Uscr=2kkw;1jo&Utilde=a0;78&Uuml=64;3h&VDash=6uj;y3&Vbar=8h7;1iw&Vcy=sy;ax&Vdash=6uh;y1&Vdashl=8h2;1is&Vee=6v5;z3&Verbar=6c6;dp&Vert=6c6;dq&VerticalBar=6qr;on&VerticalLine=3g;18&VerticalSeparator=7rs;16o&VerticalTilde=6rk;qi&VeryThinSpace=6bu;d9&Vfr=2knt;1ky&Vopf=2kp9;1m7&Vscr=2kkx;1jp&Vvdash=6ui;y2&Wcirc=ac;7k&Wedge=6v4;z0&Wfr=2knu;1kz&Wopf=2kpa;1m8&Wscr=2kky;1jq&Xfr=2knv;1l0&Xi=pq;8q&Xopf=2kpb;1m9&Xscr=2kkz;1jr&YAcy=tr;bq&YIcy=sn;an&YUcy=tq;bp&Yacute=65;3i&Ycirc=ae;7m&Ycy=tn;bm&Yfr=2knw;1l1&Yopf=2kpc;1ma&Yscr=2kl0;1js&Yuml=ag;7o&ZHcy=t2;b1&Zacute=ah;7p&Zcaron=al;7t&Zcy=t3;b2&Zdot=aj;7r&ZeroWidthSpace=6bv;df&Zeta=pi;8i&Zfr=6js;gl&Zopf=6jo;gi&Zscr=2kl1;1jt&aacute=69;3m&abreve=77;4l&ac=6ri;qg&acE=6ri,mr;qe&acd=6rj;qh&acirc=6a;3n&acute=50;28&acy=ts;br&aelig=6e;3r&af=6e9;ex&afr=2kny;1l2&agrave=68;3l&alefsym=6k5;h3&aleph=6k5;h4&alpha=q9;92&amacr=75;4j&amalg=8cf;1dm&amp=12;9&and=6qv;p6&andand=8d1;1e3&andd=8d8;1e9&andslope=8d4;1e6&andv=8d6;1e7&ang=6qo;oj&ange=884;1b1&angle=6qo;oi&angmsd=6qp;ol&angmsdaa=888;1b5&angmsdab=889;1b6&angmsdac=88a;1b7&angmsdad=88b;1b8&angmsdae=88c;1b9&angmsdaf=88d;1ba&angmsdag=88e;1bb&angmsdah=88f;1bc&angrt=6qn;og&angrtvb=6v2;yw&angrtvbd=87x;1b0&angsph=6qq;om&angst=5h;2u&angzarr=70c;12z&aogon=79;4n&aopf=2kpe;1mb&ap=6rs;r8&apE=8ds;1ej&apacir=8dr;1eh&ape=6ru;rd&apid=6rv;rf&apos=13;a&approx=6rs;r5&approxeq=6ru;rc&aring=6d;3q&ascr=2kl2;1ju&ast=16;e&asymp=6rs;r6&asympeq=6rx;rj&atilde=6b;3o&auml=6c;3p&awconint=6r7;ps&awint=8b5;1cr&bNot=8h9;1iy&backcong=6rw;rg&backepsilon=s6;af&backprime=6d1;ei&backsim=6rh;qc&backsimeq=6vh;zp&barvee=6v1;yv&barwed=6x1;11y&barwedge=6x1;11x&bbrk=71x;137&bbrktbrk=71y;138&bcong=6rw;rh&bcy=tt;bs&bdquo=6ce;e4&becaus=6r9;py&because=6r9;px&bemptyv=88g;1bd&bepsi=s6;ag&bernou=6jw;go&beta=qa;93&beth=6k6;h5&between=6ss;tt&bfr=2knz;1l3&bigcap=6v6;z5&bigcirc=7hr;15s&bigcup=6v7;z7&bigodot=8ao;1cd&bigoplus=8ap;1cf&bigotimes=8aq;1ch&bigsqcup=8au;1cl&bigstar=7id;15z&bigtriangledown=7gd;15e&bigtriangleup=7g3;154&biguplus=8as;1cj&bigvee=6v5;z1&bigwedge=6v4;yy&bkarow=83x;17x&blacklozenge=8a3;1c9&blacksquare=7fu;14x&blacktriangle=7g4;156&blacktriangledown=7ge;15g&blacktriangleleft=7gi;15k&blacktriangleright=7g8;15a&blank=74z;13f&blk12=7f6;14r&blk14=7f5;14q&blk34=7f7;14s&block=7ew;14p&bne=1p,6hx;o&bnequiv=6sh,6hx;sm&bnot=6xc;12d&bopf=2kpf;1mc&bot=6ud;xx&bottom=6ud;xu&bowtie=6vc;zi&boxDL=7dj;141&boxDR=7dg;13y&boxDl=7di;140&boxDr=7df;13x&boxH=7dc;13u&boxHD=7dy;14g&boxHU=7e1;14j&boxHd=7dw;14e&boxHu=7dz;14h&boxUL=7dp;147&boxUR=7dm;144&boxUl=7do;146&boxUr=7dl;143&boxV=7dd;13v&boxVH=7e4;14m&boxVL=7dv;14d&boxVR=7ds;14a&boxVh=7e3;14l&boxVl=7du;14c&boxVr=7dr;149&boxbox=895;1bw&boxdL=7dh;13z&boxdR=7de;13w&boxdl=7bk;13m&boxdr=7bg;13l&boxh=7b4;13j&boxhD=7dx;14f&boxhU=7e0;14i&boxhd=7cc;13r&boxhu=7ck;13s&boxminus=6u7;xi&boxplus=6u6;xg&boxtimes=6u8;xk&boxuL=7dn;145&boxuR=7dk;142&boxul=7bs;13o&boxur=7bo;13n&boxv=7b6;13k&boxvH=7e2;14k&boxvL=7dt;14b&boxvR=7dq;148&boxvh=7cs;13t&boxvl=7c4;13q&boxvr=7bw;13p&bprime=6d1;ej&breve=k8;83&brvbar=4m;1k&bscr=2kl3;1jv&bsemi=6dr;er&bsim=6rh;qd&bsime=6vh;zq&bsol=2k;x&bsolb=891;1bv&bsolhsub=7uw;16r&bull=6ci;e9&bullet=6ci;e8&bump=6ry;rp&bumpE=8fi;1gu&bumpe=6rz;ru&bumpeq=6rz;rt&cacute=7b;4p&cap=6qx;pa&capand=8ck;1dq&capbrcup=8cp;1dv&capcap=8cr;1dx&capcup=8cn;1dt&capdot=8cg;1dn&caps=6qx,1e68;p9&caret=6dd;eo&caron=jr;81&ccaps=8ct;1dz&ccaron=7h;4v&ccedil=6f;3s&ccirc=7d;4r&ccups=8cs;1dy&ccupssm=8cw;1e0&cdot=7f;4t&cedil=54;2f&cemptyv=88i;1bf&cent=4i;1g&centerdot=53;2c&cfr=2ko0;1l4&chcy=uf;ce&check=7pv;16j&checkmark=7pv;16i&chi=qv;9s&cir=7gr;15q&cirE=88z;1bt&circ=jq;7z&circeq=6s7;sc&circlearrowleft=6nu;k6&circlearrowright=6nv;k8&circledR=4u;1w&circledS=79k;13g&circledast=6u3;xc&circledcirc=6u2;xa&circleddash=6u5;xe&cire=6s7;sd&cirfnint=8b4;1cq&cirmid=8hb;1j0&cirscir=88y;1bs&clubs=7kz;168&clubsuit=7kz;167&colon=1m;j&colone=6s4;s7&coloneq=6s4;s5&comma=18;g&commat=1s;u&comp=6pt;mv&compfn=6qg;o7&complement=6pt;mu&complexes=6iq;f6&cong=6rp;qz&congdot=8dp;1ef&conint=6r2;pj&copf=2kpg;1md&coprod=6q8;nr&copy=4p;1r&copysr=6jb;fz&crarr=6np;k1&cross=7pz;16k&cscr=2kl4;1jw&csub=8gf;1id&csube=8gh;1if&csup=8gg;1ie&csupe=8gi;1ig&ctdot=6wf;11g&cudarrl=854;18x&cudarrr=851;18u&cuepr=6vy;10m&cuesc=6vz;10o&cularr=6nq;k3&cularrp=859;190&cup=6qy;pc&cupbrcap=8co;1du&cupcap=8cm;1ds&cupcup=8cq;1dw&cupdot=6tp;we&cupor=8cl;1dr&cups=6qy,1e68;pb&curarr=6nr;k5&curarrm=858;18z&curlyeqprec=6vy;10l&curlyeqsucc=6vz;10n&curlyvee=6vi;zr&curlywedge=6vj;zt&curren=4k;1i&curvearrowleft=6nq;k2&curvearrowright=6nr;k4&cuvee=6vi;zs&cuwed=6vj;zu&cwconint=6r6;pq&cwint=6r5;po&cylcty=6y5;12u&dArr=6oj;m2&dHar=86d;19t&dagger=6cg;e5&daleth=6k8;h7&darr=6mr;ia&dash=6c0;dl&dashv=6ub;xr&dbkarow=83z;180&dblac=kd;8b&dcaron=7j;4x&dcy=tw;bv&dd=6km;hb&ddagger=6ch;e6&ddarr=6oa;ld&ddotseq=8dz;1ep&deg=4w;21&delta=qc;95&demptyv=88h;1be&dfisht=873;1aj&dfr=2ko1;1l5&dharl=6o3;kx&dharr=6o2;ku&diam=6v8;zc&diamond=6v8;zb&diamondsuit=7l2;16b&diams=7l2;16c&die=4o;1o&digamma=rh;a6&disin=6wi;11j&div=6v;49&divide=6v;48&divideontimes=6vb;zg&divonx=6vb;zh&djcy=uq;co&dlcorn=6xq;12n&dlcrop=6x9;12a&dollar=10;6&dopf=2kph;1me&dot=k9;85&doteq=6s0;rx&doteqdot=6s1;rz&dotminus=6rc;q2&dotplus=6qc;ny&dotsquare=6u9;xm&doublebarwedge=6x2;11z&downarrow=6mr;i9&downdownarrows=6oa;lc&downharpoonleft=6o3;kv&downharpoonright=6o2;ks&drbkarow=840;182&drcorn=6xr;12p&drcrop=6x8;129&dscr=2kl5;1jx&dscy=ut;cr&dsol=8ae;1cc&dstrok=7l;4z&dtdot=6wh;11i&dtri=7gf;15j&dtrif=7ge;15h&duarr=6ph;mo&duhar=86n;1a5&dwangle=886;1b3&dzcy=v3;d0&dzigrarr=7wf;17r&eDDot=8dz;1eq&eDot=6s1;s0&eacute=6h;3u&easter=8dq;1eg&ecaron=7v;57&ecir=6s6;sb&ecirc=6i;3v&ecolon=6s5;s9&ecy=ul;ck&edot=7r;53&ee=6kn;he&efDot=6s2;s2&efr=2ko2;1l6&eg=8ey;1g9&egrave=6g;3t&egs=8eu;1g5&egsdot=8ew;1g7&el=8ex;1g8&elinters=73b;13e&ell=6j7;fv&els=8et;1g3&elsdot=8ev;1g6&emacr=7n;51&empty=6px;n7&emptyset=6px;n5&emptyv=6px;n6&emsp=6bn;d2&emsp13=6bo;d3&emsp14=6bp;d4&eng=97;6h&ensp=6bm;d1&eogon=7t;55&eopf=2kpi;1mf&epar=6vp;103&eparsl=89v;1c6&eplus=8dt;1ek&epsi=qd;97&epsilon=qd;96&epsiv=s5;ae&eqcirc=6s6;sa&eqcolon=6s5;s8&eqsim=6rm;qq&eqslantgtr=8eu;1g4&eqslantless=8et;1g2&equals=1p;p&equest=6sf;sj&equiv=6sh;so&equivDD=8e0;1er&eqvparsl=89x;1c8&erDot=6s3;s4&erarr=86p;1a7&escr=6jz;gs&esdot=6s0;ry&esim=6rm;qr&eta=qf;99&eth=6o;41&euml=6j;3w&euro=6gc;f2&excl=x;2&exist=6pv;n0&expectation=6k0;gt&exponentiale=6kn;hd&fallingdotseq=6s2;s1&fcy=uc;cb&female=7k0;163&ffilig=1dkz;1ja&fflig=1dkw;1j7&ffllig=1dl0;1jb&ffr=2ko3;1l7&filig=1dkx;1j8&fjlig=2u,2y;15&flat=7l9;16e&fllig=1dky;1j9&fltns=7g1;153&fnof=b6;7v&fopf=2kpj;1mg&forall=6ps;mt&fork=6vo;102&forkv=8gp;1in&fpartint=8b1;1cp&frac12=59;2k&frac13=6kz;hh&frac14=58;2j&frac15=6l1;hj&frac16=6l5;hn&frac18=6l7;hp&frac23=6l0;hi&frac25=6l2;hk&frac34=5a;2m&frac35=6l3;hl&frac38=6l8;hq&frac45=6l4;hm&frac56=6l6;ho&frac58=6l9;hr&frac78=6la;hs&frasl=6dg;eq&frown=6xu;12r&fscr=2kl7;1jy&gE=6sn;t8&gEl=8ek;1ft&gacute=dx;7x&gamma=qb;94&gammad=rh;a7&gap=8ee;1fh&gbreve=7z;5b&gcirc=7x;59&gcy=tv;bu&gdot=81;5d&ge=6sl;sx&gel=6vv;10k&geq=6sl;sw&geqq=6sn;t7&geqslant=8e6;1f6&ges=8e6;1f7&gescc=8fd;1gn&gesdot=8e8;1f9&gesdoto=8ea;1fb&gesdotol=8ec;1fd&gesl=6vv,1e68;10h&gesles=8es;1g1&gfr=2ko4;1l8&gg=6sr;ts&ggg=6vt;10b&gimel=6k7;h6&gjcy=ur;cp&gl=6t3;un&glE=8eq;1fz&gla=8f9;1gj&glj=8f8;1gi&gnE=6sp;tg&gnap=8ei;1fp&gnapprox=8ei;1fo&gne=8eg;1fl&gneq=8eg;1fk&gneqq=6sp;tf&gnsim=6w7;10y&gopf=2kpk;1mh&grave=2o;14&gscr=6iy;f9&gsim=6sz;ud&gsime=8em;1fv&gsiml=8eo;1fx&gt=1q;s&gtcc=8fb;1gl&gtcir=8e2;1et&gtdot=6vr;107&gtlPar=87p;1aw&gtquest=8e4;1ev&gtrapprox=8ee;1fg&gtrarr=86w;1ad&gtrdot=6vr;106&gtreqless=6vv;10j&gtreqqless=8ek;1fs&gtrless=6t3;um&gtrsim=6sz;uc&gvertneqq=6sp,1e68;td&gvnE=6sp,1e68;te&hArr=6ok;m5&hairsp=6bu;da&half=59;2l&hamilt=6iz;fb&hardcy=ui;ch&harr=6ms;id&harrcir=85k;192&harrw=6nh;js&hbar=6j3;fl&hcirc=85;5g&hearts=7l1;16a&heartsuit=7l1;169&hellip=6cm;eb&hercon=6ux;yr&hfr=2ko5;1l9&hksearow=84l;18i&hkswarow=84m;18k&hoarr=6pr;mr&homtht=6rf;q5&hookleftarrow=6nd;jj&hookrightarrow=6ne;jl&hopf=2kpl;1mi&horbar=6c5;do&hscr=2kl9;1jz&hslash=6j3;fi&hstrok=87;5i&hybull=6df;ep&hyphen=6c0;dk&iacute=6l;3y&ic=6eb;f1&icirc=6m;3z&icy=u0;bz&iecy=tx;bw&iexcl=4h;1f&iff=6ok;m6&ifr=2ko6;1la&igrave=6k;3x&ii=6ko;hg&iiiint=8b0;1cn&iiint=6r1;pg&iinfin=89o;1c3&iiota=6jt;gm&ijlig=8j;5t&imacr=8b;5m&image=6j5;fp&imagline=6j4;fm&imagpart=6j5;fo&imath=8h;5r&imof=6uv;yo&imped=c5;7w&in=6q0;nd&incare=6it;f8&infin=6qm;of&infintie=89p;1c4&inodot=8h;5q&int=6qz;pe&intcal=6uy;yt&integers=6jo;gh&intercal=6uy;ys&intlarhk=8bb;1cx&intprod=8cc;1dk&iocy=up;cn&iogon=8f;5o&iopf=2kpm;1mj&iota=qh;9b&iprod=8cc;1dl&iquest=5b;2n&iscr=2kla;1k0&isin=6q0;nc&isinE=6wp;11r&isindot=6wl;11n&isins=6wk;11l&isinsv=6wj;11k&isinv=6q0;nb&it=6ea;ez&itilde=89;5k&iukcy=uu;cs&iuml=6n;40&jcirc=8l;5v&jcy=u1;c0&jfr=2ko7;1lb&jmath=fr;7y&jopf=2kpn;1mk&jscr=2klb;1k1&jsercy=uw;cu&jukcy=us;cq&kappa=qi;9c&kappav=s0;a9&kcedil=8n;5x&kcy=u2;c1&kfr=2ko8;1lc&kgreen=8o;5y&khcy=ud;cc&kjcy=v0;cy&kopf=2kpo;1ml&kscr=2klc;1k2&lAarr=6oq;mf&lArr=6og;ls&lAtail=84b;18a&lBarr=83y;17z&lE=6sm;t2&lEg=8ej;1fr&lHar=86a;19q&lacute=8q;60&laemptyv=88k;1bh&lagran=6j6;ft&lambda=qj;9d&lang=7vs;16z&langd=87l;1as&langle=7vs;16y&lap=8ed;1ff&laquo=4r;1t&larr=6mo;hx&larrb=6p0;mk&larrbfs=84f;18e&larrfs=84d;18c&larrhk=6nd;jk&larrlp=6nf;jo&larrpl=855;18y&larrsim=86r;1a9&larrtl=6n6;j7&lat=8ff;1gp&latail=849;188&late=8fh;1gt&lates=8fh,1e68;1gs&lbarr=83w;17w&lbbrk=7si;16p&lbrace=3f;16&lbrack=2j;v&lbrke=87f;1am&lbrksld=87j;1aq&lbrkslu=87h;1ao&lcaron=8u;64&lcedil=8s;62&lceil=6x4;122&lcub=3f;17&lcy=u3;c2&ldca=852;18v&ldquo=6cc;dz&ldquor=6ce;e3&ldrdhar=86f;19v&ldrushar=85n;195&ldsh=6nm;jz&le=6sk;st&leftarrow=6mo;hv&leftarrowtail=6n6;j6&leftharpoondown=6nx;kd&leftharpoonup=6nw;ka&leftleftarrows=6o7;l6&leftrightarrow=6ms;ic&leftrightarrows=6o6;l4&leftrightharpoons=6ob;lf&leftrightsquigarrow=6nh;jr&leftthreetimes=6vf;zl&leg=6vu;10g&leq=6sk;ss&leqq=6sm;t1&leqslant=8e5;1f0&les=8e5;1f1&lescc=8fc;1gm&lesdot=8e7;1f8&lesdoto=8e9;1fa&lesdotor=8eb;1fc&lesg=6vu,1e68;10d&lesges=8er;1g0&lessapprox=8ed;1fe&lessdot=6vq;104&lesseqgtr=6vu;10f&lesseqqgtr=8ej;1fq&lessgtr=6t2;uj&lesssim=6sy;u9&lfisht=870;1ag&lfloor=6x6;126&lfr=2ko9;1ld&lg=6t2;uk&lgE=8ep;1fy&lhard=6nx;kf&lharu=6nw;kc&lharul=86i;19y&lhblk=7es;14o&ljcy=ux;cv&ll=6sq;tm&llarr=6o7;l7&llcorner=6xq;12m&llhard=86j;19z&lltri=7i2;15w&lmidot=8w;66&lmoust=71s;131&lmoustache=71s;130&lnE=6so;tc&lnap=8eh;1fn&lnapprox=8eh;1fm&lne=8ef;1fj&lneq=8ef;1fi&lneqq=6so;tb&lnsim=6w6;10x&loang=7vw;175&loarr=6pp;mp&lobrk=7vq;16u&longleftarrow=7w5;178&longleftrightarrow=7w7;17e&longmapsto=7wc;17p&longrightarrow=7w6;17b&looparrowleft=6nf;jn&looparrowright=6ng;jp&lopar=879;1ak&lopf=2kpp;1mm&loplus=8bx;1d6&lotimes=8c4;1dc&lowast=6qf;o5&lowbar=2n;12&loz=7gq;15p&lozenge=7gq;15o&lozf=8a3;1ca&lpar=14;b&lparlt=87n;1au&lrarr=6o6;l5&lrcorner=6xr;12o&lrhar=6ob;lg&lrhard=86l;1a1&lrm=6by;di&lrtri=6v3;yx&lsaquo=6d5;ek&lscr=2kld;1k3&lsh=6nk;jw&lsim=6sy;ua&lsime=8el;1fu&lsimg=8en;1fw&lsqb=2j;w&lsquo=6c8;ds&lsquor=6ca;dw&lstrok=8y;68&lt=1o;n&ltcc=8fa;1gk&ltcir=8e1;1es&ltdot=6vq;105&lthree=6vf;zm&ltimes=6vd;zj&ltlarr=86u;1ac&ltquest=8e3;1eu&ltrPar=87q;1ax&ltri=7gj;15n&ltrie=6us;yi&ltrif=7gi;15l&lurdshar=85m;194&luruhar=86e;19u&lvertneqq=6so,1e68;t9&lvnE=6so,1e68;ta&mDDot=6re;q4&macr=4v;20&male=7k2;164&malt=7q8;16m&maltese=7q8;16l&map=6na;jg&mapsto=6na;jf&mapstodown=6nb;ji&mapstoleft=6n8;jb&mapstoup=6n9;jd&marker=7fy;152&mcomma=8bt;1d4&mcy=u4;c3&mdash=6c4;dn&measuredangle=6qp;ok&mfr=2koa;1le&mho=6jr;gj&micro=51;29&mid=6qr;oq&midast=16;d&midcir=8hc;1j1&middot=53;2d&minus=6qa;nu&minusb=6u7;xj&minusd=6rc;q3&minusdu=8bu;1d5&mlcp=8gr;1ip&mldr=6cm;ec&mnplus=6qb;nw&models=6uf;xy&mopf=2kpq;1mn&mp=6qb;nx&mscr=2kle;1k4&mstpos=6ri;qf&mu=qk;9e&multimap=6uw;yp&mumap=6uw;yq&nGg=6vt,mw;10a&nGt=6sr,6he;tp&nGtv=6sr,mw;to&nLeftarrow=6od;lk&nLeftrightarrow=6oe;lm&nLl=6vs,mw;108&nLt=6sq,6he;tj&nLtv=6sq,mw;ti&nRightarrow=6of;lo&nVDash=6un;y7&nVdash=6um;y6&nabla=6pz;n8&nacute=90;6a&nang=6qo,6he;oh&nap=6rt;rb&napE=8ds,mw;1ei&napid=6rv,mw;re&napos=95;6f&napprox=6rt;ra&natur=7la;16g&natural=7la;16f&naturals=6j9;fw&nbsp=4g;1e&nbump=6ry,mw;rm&nbumpe=6rz,mw;rr&ncap=8cj;1dp&ncaron=94;6e&ncedil=92;6c&ncong=6rr;r2&ncongdot=8dp,mw;1ee&ncup=8ci;1do&ncy=u5;c4&ndash=6c3;dm&ne=6sg;sl&neArr=6on;mb&nearhk=84k;18h&nearr=6mv;im&nearrow=6mv;il&nedot=6s0,mw;rv&nequiv=6si;sq&nesear=84o;18n&nesim=6rm,mw;qo&nexist=6pw;n3&nexists=6pw;n2&nfr=2kob;1lf&ngE=6sn,mw;t4&nge=6sx;u7&ngeq=6sx;u6&ngeqq=6sn,mw;t5&ngeqslant=8e6,mw;1f3&nges=8e6,mw;1f4&ngsim=6t1;uh&ngt=6sv;u1&ngtr=6sv;u0&nhArr=6oe;ln&nharr=6ni;ju&nhpar=8he;1j3&ni=6q3;nk&nis=6ws;11u&nisd=6wq;11s&niv=6q3;nj&njcy=uy;cw&nlArr=6od;ll&nlE=6sm,mw;sy&nlarr=6my;iu&nldr=6cl;ea&nle=6sw;u4&nleftarrow=6my;it&nleftrightarrow=6ni;jt&nleq=6sw;u3&nleqq=6sm,mw;sz&nleqslant=8e5,mw;1ex&nles=8e5,mw;1ey&nless=6su;tx&nlsim=6t0;uf&nlt=6su;ty&nltri=6wa;115&nltrie=6wc;11b&nmid=6qs;ou&nopf=2kpr;1mo&not=4s;1u&notin=6q1;ng&notinE=6wp,mw;11q&notindot=6wl,mw;11m&notinva=6q1;nf&notinvb=6wn;11p&notinvc=6wm;11o&notni=6q4;nn&notniva=6q4;nm&notnivb=6wu;11w&notnivc=6wt;11v&npar=6qu;p4&nparallel=6qu;p2&nparsl=8hp,6hx;1j5&npart=6pu,mw;mw&npolint=8b8;1cu&npr=6tc;vd&nprcue=6w0;10q&npre=8fj,mw;1gw&nprec=6tc;vc&npreceq=8fj,mw;1gx&nrArr=6of;lp&nrarr=6mz;iw&nrarrc=84z,mw;18s&nrarrw=6n1,mw;ix&nrightarrow=6mz;iv&nrtri=6wb;118&nrtrie=6wd;11e&nsc=6td;vg&nsccue=6w1;10s&nsce=8fk,mw;1h2&nscr=2klf;1k5&nshortmid=6qs;os&nshortparallel=6qu;p1&nsim=6rl;qm&nsime=6ro;qx&nsimeq=6ro;qw&nsmid=6qs;ot&nspar=6qu;p3&nsqsube=6w2;10u&nsqsupe=6w3;10w&nsub=6tg;vs&nsubE=8g5,mw;1hv&nsube=6tk;w2&nsubset=6te,6he;vi&nsubseteq=6tk;w1&nsubseteqq=8g5,mw;1hw&nsucc=6td;vf&nsucceq=8fk,mw;1h3&nsup=6th;vt&nsupE=8g6,mw;1hz&nsupe=6tl;w5&nsupset=6tf,6he;vn&nsupseteq=6tl;w4&nsupseteqq=8g6,mw;1i0&ntgl=6t5;ur&ntilde=6p;42&ntlg=6t4;up&ntriangleleft=6wa;114&ntrianglelefteq=6wc;11a&ntriangleright=6wb;117&ntrianglerighteq=6wd;11d&nu=ql;9f&num=z;5&numero=6ja;fy&numsp=6br;d5&nvDash=6ul;y5&nvHarr=83o;17u&nvap=6rx,6he;ri&nvdash=6uk;y4&nvge=6sl,6he;su&nvgt=1q,6he;q&nvinfin=89q;1c5&nvlArr=83m;17s&nvle=6sk,6he;sr&nvlt=1o,6he;l&nvltrie=6us,6he;yf&nvrArr=83n;17t&nvrtrie=6ut,6he;yj&nvsim=6rg,6he;q6&nwArr=6om;ma&nwarhk=84j;18g&nwarr=6mu;ij&nwarrow=6mu;ii&nwnear=84n;18m&oS=79k;13h&oacute=6r;44&oast=6u3;xd&ocir=6u2;xb&ocirc=6s;45&ocy=u6;c5&odash=6u5;xf&odblac=9d;6l&odiv=8c8;1dg&odot=6u1;x9&odsold=88s;1bn&oelig=9f;6n&ofcir=88v;1bp&ofr=2koc;1lg&ogon=kb;87&ograve=6q;43&ogt=88x;1br&ohbar=88l;1bi&ohm=q1;91&oint=6r2;pk&olarr=6nu;k7&olcir=88u;1bo&olcross=88r;1bm&oline=6da;en&olt=88w;1bq&omacr=99;6j&omega=qx;9u&omicron=qn;9h&omid=88m;1bj&ominus=6ty;x4&oopf=2kps;1mp&opar=88n;1bk&operp=88p;1bl&oplus=6tx;x2&or=6qw;p8&orarr=6nv;k9&ord=8d9;1ea&order=6k4;h1&orderof=6k4;h0&ordf=4q;1s&ordm=56;2h&origof=6uu;yn&oror=8d2;1e4&orslope=8d3;1e5&orv=8d7;1e8&oscr=6k4;h2&oslash=6w;4a&osol=6u0;x7&otilde=6t;46&otimes=6tz;x6&otimesas=8c6;1de&ouml=6u;47&ovbar=6yl;12x&par=6qt;oz&para=52;2a&parallel=6qt;ox&parsim=8hf;1j4&parsl=8hp;1j6&part=6pu;my&pcy=u7;c6&percnt=11;7&period=1a;h&permil=6cw;ed&perp=6ud;xw&pertenk=6cx;ee&pfr=2kod;1lh&phi=qu;9r&phiv=r9;a2&phmmat=6k3;gy&phone=7im;162&pi=qo;9i&pitchfork=6vo;101&piv=ra;a4&planck=6j3;fj&planckh=6j2;fh&plankv=6j3;fk&plus=17;f&plusacir=8bn;1cz&plusb=6u6;xh&pluscir=8bm;1cy&plusdo=6qc;nz&plusdu=8bp;1d1&pluse=8du;1el&plusmn=4x;23&plussim=8bq;1d2&plustwo=8br;1d3&pm=4x;24&pointint=8b9;1cv&popf=2kpt;1mq&pound=4j;1h&pr=6t6;uu&prE=8fn;1h7&prap=8fr;1he&prcue=6t8;v0&pre=8fj;1h0&prec=6t6;ut&precapprox=8fr;1hd&preccurlyeq=6t8;uz&preceq=8fj;1gz&precnapprox=8ft;1hh&precneqq=8fp;1h9&precnsim=6w8;10z&precsim=6ta;v5&prime=6cy;ef&primes=6jd;g2&prnE=8fp;1ha&prnap=8ft;1hi&prnsim=6w8;110&prod=6q7;np&profalar=6y6;12v&profline=6xe;12e&profsurf=6xf;12f&prop=6ql;oe&propto=6ql;oc&prsim=6ta;v6&prurel=6uo;y8&pscr=2klh;1k6&psi=qw;9t&puncsp=6bs;d6&qfr=2koe;1li&qint=8b0;1co&qopf=2kpu;1mr&qprime=6dz;es&qscr=2kli;1k7&quaternions=6j1;ff&quatint=8ba;1cw&quest=1r;t&questeq=6sf;si&quot=y;4&rAarr=6or;mh&rArr=6oi;lz&rAtail=84c;18b&rBarr=83z;181&rHar=86c;19s&race=6rh,mp;qb&racute=9h;6p&radic=6qi;o8&raemptyv=88j;1bg&rang=7vt;172&rangd=87m;1at&range=885;1b2&rangle=7vt;171&raquo=57;2i&rarr=6mq;i6&rarrap=86t;1ab&rarrb=6p1;mm&rarrbfs=84g;18f&rarrc=84z;18t&rarrfs=84e;18d&rarrhk=6ne;jm&rarrlp=6ng;jq&rarrpl=85h;191&rarrsim=86s;1aa&rarrtl=6n7;j9&rarrw=6n1;iz&ratail=84a;189&ratio=6ra;pz&rationals=6je;g4&rbarr=83x;17y&rbbrk=7sj;16q&rbrace=3h;1b&rbrack=2l;y&rbrke=87g;1an&rbrksld=87i;1ap&rbrkslu=87k;1ar&rcaron=9l;6t&rcedil=9j;6r&rceil=6x5;124&rcub=3h;1c&rcy=u8;c7&rdca=853;18w&rdldhar=86h;19x&rdquo=6cd;e2&rdquor=6cd;e1&rdsh=6nn;k0&real=6jg;g9&realine=6jf;g6&realpart=6jg;g8&reals=6jh;gc&rect=7fx;151&reg=4u;1y&rfisht=871;1ah&rfloor=6x7;128&rfr=2kof;1lj&rhard=6o1;kr&rharu=6o0;ko&rharul=86k;1a0&rho=qp;9j&rhov=s1;ab&rightarrow=6mq;i4&rightarrowtail=6n7;j8&rightharpoondown=6o1;kp&rightharpoonup=6o0;km&rightleftarrows=6o4;kz&rightleftharpoons=6oc;lh&rightrightarrows=6o9;la&rightsquigarrow=6n1;iy&rightthreetimes=6vg;zn&ring=ka;86&risingdotseq=6s3;s3&rlarr=6o4;l0&rlhar=6oc;lj&rlm=6bz;dj&rmoust=71t;133&rmoustache=71t;132&rnmid=8ha;1iz&roang=7vx;176&roarr=6pq;mq&robrk=7vr;16w&ropar=87a;1al&ropf=2kpv;1ms&roplus=8by;1d7&rotimes=8c5;1dd&rpar=15;c&rpargt=87o;1av&rppolint=8b6;1cs&rrarr=6o9;lb&rsaquo=6d6;el&rscr=2klj;1k8&rsh=6nl;jy&rsqb=2l;z&rsquo=6c9;dv&rsquor=6c9;du&rthree=6vg;zo&rtimes=6ve;zk&rtri=7g9;15d&rtrie=6ut;ym&rtrif=7g8;15b&rtriltri=89a;1by&ruluhar=86g;19w&rx=6ji;ge&sacute=9n;6v&sbquo=6ca;dx&sc=6t7;ux&scE=8fo;1h8&scap=8fs;1hg&scaron=9t;71&sccue=6t9;v3&sce=8fk;1h6&scedil=9r;6z&scirc=9p;6x&scnE=8fq;1hc&scnap=8fu;1hk&scnsim=6w9;112&scpolint=8b7;1ct&scsim=6tb;va&scy=u9;c8&sdot=6v9;zd&sdotb=6u9;xn&sdote=8di;1ec&seArr=6oo;mc&searhk=84l;18j&searr=6mw;ip&searrow=6mw;io&sect=4n;1l&semi=1n;k&seswar=84p;18p&setminus=6qe;o2&setmn=6qe;o4&sext=7qu;16n&sfr=2kog;1lk&sfrown=6xu;12q&sharp=7lb;16h&shchcy=uh;cg&shcy=ug;cf&shortmid=6qr;oo&shortparallel=6qt;ow&shy=4t;1v&sigma=qr;9n&sigmaf=qq;9l&sigmav=qq;9m&sim=6rg;qa&simdot=8dm;1ed&sime=6rn;qu&simeq=6rn;qt&simg=8f2;1gb&simgE=8f4;1gd&siml=8f1;1ga&simlE=8f3;1gc&simne=6rq;r0&simplus=8bo;1d0&simrarr=86q;1a8&slarr=6mo;hw&smallsetminus=6qe;o0&smashp=8c3;1db&smeparsl=89w;1c7&smid=6qr;op&smile=6xv;12t&smt=8fe;1go&smte=8fg;1gr&smtes=8fg,1e68;1gq&softcy=uk;cj&sol=1b;i&solb=890;1bu&solbar=6yn;12y&sopf=2kpw;1mt&spades=7kw;166&spadesuit=7kw;165&spar=6qt;oy&sqcap=6tv;wx&sqcaps=6tv,1e68;wv&sqcup=6tw;x0&sqcups=6tw,1e68;wy&sqsub=6tr;wk&sqsube=6tt;wr&sqsubset=6tr;wj&sqsubseteq=6tt;wq&sqsup=6ts;wo&sqsupe=6tu;wu&sqsupset=6ts;wn&sqsupseteq=6tu;wt&squ=7fl;14v&square=7fl;14u&squarf=7fu;14y&squf=7fu;14z&srarr=6mq;i5&sscr=2klk;1k9&ssetmn=6qe;o3&ssmile=6xv;12s&sstarf=6va;ze&star=7ie;161&starf=7id;160&straightepsilon=s5;ac&straightphi=r9;a0&strns=4v;1z&sub=6te;vl&subE=8g5;1hy&subdot=8fx;1hn&sube=6ti;vw&subedot=8g3;1ht&submult=8g1;1hr&subnE=8gb;1i8&subne=6tm;w9&subplus=8fz;1hp&subrarr=86x;1ae&subset=6te;vk&subseteq=6ti;vv&subseteqq=8g5;1hx&subsetneq=6tm;w8&subsetneqq=8gb;1i7&subsim=8g7;1i3&subsub=8gl;1ij&subsup=8gj;1ih&succ=6t7;uw&succapprox=8fs;1hf&succcurlyeq=6t9;v2&succeq=8fk;1h5&succnapprox=8fu;1hj&succneqq=8fq;1hb&succnsim=6w9;111&succsim=6tb;v9&sum=6q9;nt&sung=7l6;16d&sup=6tf;vr&sup1=55;2g&sup2=4y;25&sup3=4z;26&supE=8g6;1i2&supdot=8fy;1ho&supdsub=8go;1im&supe=6tj;vz&supedot=8g4;1hu&suphsol=7ux;16s&suphsub=8gn;1il&suplarr=86z;1af&supmult=8g2;1hs&supnE=8gc;1ic&supne=6tn;wd&supplus=8g0;1hq&supset=6tf;vq&supseteq=6tj;vy&supseteqq=8g6;1i1&supsetneq=6tn;wc&supsetneqq=8gc;1ib&supsim=8g8;1i4&supsub=8gk;1ii&supsup=8gm;1ik&swArr=6op;md&swarhk=84m;18l&swarr=6mx;is&swarrow=6mx;ir&swnwar=84q;18r&szlig=67;3k&target=6xi;12h&tau=qs;9o&tbrk=71w;135&tcaron=9x;75&tcedil=9v;73&tcy=ua;c9&tdot=6hn;f4&telrec=6xh;12g&tfr=2koh;1ll&there4=6r8;pv&therefore=6r8;pu&theta=qg;9a&thetasym=r5;9v&thetav=r5;9x&thickapprox=6rs;r3&thicksim=6rg;q7&thinsp=6bt;d8&thkap=6rs;r7&thksim=6rg;q8&thorn=72;4g&tilde=kc;89&times=5z;3c&timesb=6u8;xl&timesbar=8c1;1da&timesd=8c0;1d9&tint=6r1;ph&toea=84o;18o&top=6uc;xt&topbot=6ye;12w&topcir=8hd;1j2&topf=2kpx;1mu&topfork=8gq;1io&tosa=84p;18q&tprime=6d0;eh&trade=6jm;gg&triangle=7g5;158&triangledown=7gf;15i&triangleleft=7gj;15m&trianglelefteq=6us;yh&triangleq=6sc;sg&triangleright=7g9;15c&trianglerighteq=6ut;yl&tridot=7ho;15r&trie=6sc;sh&triminus=8ca;1di&triplus=8c9;1dh&trisb=899;1bx&tritime=8cb;1dj&trpezium=736;13d&tscr=2kll;1ka&tscy=ue;cd&tshcy=uz;cx&tstrok=9z;77&twixt=6ss;tu&twoheadleftarrow=6n2;j0&twoheadrightarrow=6n4;j3&uArr=6oh;lv&uHar=86b;19r&uacute=6y;4c&uarr=6mp;i1&ubrcy=v2;cz&ubreve=a5;7d&ucirc=6z;4d&ucy=ub;ca&udarr=6o5;l2&udblac=a9;7h&udhar=86m;1a3&ufisht=872;1ai&ufr=2koi;1lm&ugrave=6x;4b&uharl=6nz;kl&uharr=6ny;ki&uhblk=7eo;14n&ulcorn=6xo;12j&ulcorner=6xo;12i&ulcrop=6xb;12c&ultri=7i0;15u&umacr=a3;7b&uml=4o;1p&uogon=ab;7j&uopf=2kpy;1mv&uparrow=6mp;i0&updownarrow=6mt;if&upharpoonleft=6nz;kj&upharpoonright=6ny;kg&uplus=6tq;wg&upsi=qt;9q&upsih=r6;9y&upsilon=qt;9p&upuparrows=6o8;l8&urcorn=6xp;12l&urcorner=6xp;12k&urcrop=6xa;12b&uring=a7;7f&urtri=7i1;15v&uscr=2klm;1kb&utdot=6wg;11h&utilde=a1;79&utri=7g5;159&utrif=7g4;157&uuarr=6o8;l9&uuml=70;4e&uwangle=887;1b4&vArr=6ol;m9&vBar=8h4;1iu&vBarv=8h5;1iv&vDash=6ug;y0&vangrt=87w;1az&varepsilon=s5;ad&varkappa=s0;a8&varnothing=6px;n4&varphi=r9;a1&varpi=ra;a3&varpropto=6ql;ob&varr=6mt;ig&varrho=s1;aa&varsigma=qq;9k&varsubsetneq=6tm,1e68;w6&varsubsetneqq=8gb,1e68;1i5&varsupsetneq=6tn,1e68;wa&varsupsetneqq=8gc,1e68;1i9&vartheta=r5;9w&vartriangleleft=6uq;y9&vartriangleright=6ur;yc&vcy=tu;bt&vdash=6ua;xp&vee=6qw;p7&veebar=6uz;yu&veeeq=6sa;sf&vellip=6we;11f&verbar=3g;19&vert=3g;1a&vfr=2koj;1ln&vltri=6uq;yb&vnsub=6te,6he;vj&vnsup=6tf,6he;vo&vopf=2kpz;1mw&vprop=6ql;od&vrtri=6ur;ye&vscr=2kln;1kc&vsubnE=8gb,1e68;1i6&vsubne=6tm,1e68;w7&vsupnE=8gc,1e68;1ia&vsupne=6tn,1e68;wb&vzigzag=87u;1ay&wcirc=ad;7l&wedbar=8db;1eb&wedge=6qv;p5&wedgeq=6s9;se&weierp=6jc;g0&wfr=2kok;1lo&wopf=2kq0;1mx&wp=6jc;g1&wr=6rk;qk&wreath=6rk;qj&wscr=2klo;1kd&xcap=6v6;z6&xcirc=7hr;15t&xcup=6v7;z9&xdtri=7gd;15f&xfr=2kol;1lp&xhArr=7wa;17o&xharr=7w7;17f&xi=qm;9g&xlArr=7w8;17i&xlarr=7w5;179&xmap=7wc;17q&xnis=6wr;11t&xodot=8ao;1ce&xopf=2kq1;1my&xoplus=8ap;1cg&xotime=8aq;1ci&xrArr=7w9;17l&xrarr=7w6;17c&xscr=2klp;1ke&xsqcup=8au;1cm&xuplus=8as;1ck&xutri=7g3;155&xvee=6v5;z2&xwedge=6v4;yz&yacute=71;4f&yacy=un;cm&ycirc=af;7n&ycy=uj;ci&yen=4l;1j&yfr=2kom;1lq&yicy=uv;ct&yopf=2kq2;1mz&yscr=2klq;1kf&yucy=um;cl&yuml=73;4h&zacute=ai;7q&zcaron=am;7u&zcy=tz;by&zdot=ak;7s&zeetrf=6js;gk&zeta=qe;98&zfr=2kon;1lr&zhcy=ty;bx&zigrarr=6ot;mi&zopf=2kq3;1n0&zscr=2klr;1kg&zwj=6bx;dh&zwnj=6bw;dg&";
+}
+
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/CharacterReader.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/CharacterReader.java
new file mode 100644
index 00000000..c29b4454
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/CharacterReader.java
@@ -0,0 +1,483 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Locale;
+
+import ru.noties.markwon.html.jsoup.UncheckedIOException;
+import ru.noties.markwon.html.jsoup.helper.Validate;
+
+/**
+ CharacterReader consumes tokens off a string. Used internally by jsoup. API subject to changes.
+ */
+public final class CharacterReader {
+    static final char EOF = (char) -1;
+    private static final int maxStringCacheLen = 12;
+    static final int maxBufferLen = 1024 * 32; // visible for testing
+    private static final int readAheadLimit = (int) (maxBufferLen * 0.75);
+
+    private final char[] charBuf;
+    private final Reader reader;
+    private int bufLength;
+    private int bufSplitPoint;
+    private int bufPos;
+    private int readerPos;
+    private int bufMark;
+    private final String[] stringCache = new String[512]; // holds reused strings in this doc, to lessen garbage
+
+    public CharacterReader(Reader input, int sz) {
+        Validate.notNull(input);
+        Validate.isTrue(input.markSupported());
+        reader = input;
+        charBuf = new char[sz > maxBufferLen ? maxBufferLen : sz];
+        bufferUp();
+    }
+
+    public CharacterReader(Reader input) {
+        this(input, maxBufferLen);
+    }
+
+    public CharacterReader(String input) {
+        this(new StringReader(input), input.length());
+    }
+
+    private void bufferUp() {
+        if (bufPos < bufSplitPoint)
+            return;
+
+        try {
+            reader.skip(bufPos);
+            reader.mark(maxBufferLen);
+            final int read = reader.read(charBuf);
+            reader.reset();
+            if (read != -1) {
+                bufLength = read;
+                readerPos += bufPos;
+                bufPos = 0;
+                bufMark = 0;
+                bufSplitPoint = bufLength > readAheadLimit ? readAheadLimit : bufLength;
+            }
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+    /**
+     * Gets the current cursor position in the content.
+     * @return current position
+     */
+    public int pos() {
+        return readerPos + bufPos;
+    }
+
+    /**
+     * Tests if all the content has been read.
+     * @return true if nothing left to read.
+     */
+    public boolean isEmpty() {
+        bufferUp();
+        return bufPos >= bufLength;
+    }
+
+    private boolean isEmptyNoBufferUp() {
+        return bufPos >= bufLength;
+    }
+
+    /**
+     * Get the char at the current position.
+     * @return char
+     */
+    public char current() {
+        bufferUp();
+        return isEmptyNoBufferUp() ? EOF : charBuf[bufPos];
+    }
+
+    char consume() {
+        bufferUp();
+        char val = isEmptyNoBufferUp() ? EOF : charBuf[bufPos];
+        bufPos++;
+        return val;
+    }
+
+    void unconsume() {
+        bufPos--;
+    }
+
+    /**
+     * Moves the current position by one.
+     */
+    public void advance() {
+        bufPos++;
+    }
+
+    void mark() {
+        bufMark = bufPos;
+    }
+
+    void rewindToMark() {
+        bufPos = bufMark;
+    }
+
+    /**
+     * Returns the number of characters between the current position and the next instance of the input char
+     * @param c scan target
+     * @return offset between current position and next instance of target. -1 if not found.
+     */
+    int nextIndexOf(char c) {
+        // doesn't handle scanning for surrogates
+        bufferUp();
+        for (int i = bufPos; i < bufLength; i++) {
+            if (c == charBuf[i])
+                return i - bufPos;
+        }
+        return -1;
+    }
+
+    /**
+     * Returns the number of characters between the current position and the next instance of the input sequence
+     *
+     * @param seq scan target
+     * @return offset between current position and next instance of target. -1 if not found.
+     */
+    int nextIndexOf(CharSequence seq) {
+        bufferUp();
+        // doesn't handle scanning for surrogates
+        char startChar = seq.charAt(0);
+        for (int offset = bufPos; offset < bufLength; offset++) {
+            // scan to first instance of startchar:
+            if (startChar != charBuf[offset])
+                while(++offset < bufLength && startChar != charBuf[offset]) { /* empty */ }
+            int i = offset + 1;
+            int last = i + seq.length()-1;
+            if (offset < bufLength && last <= bufLength) {
+                for (int j = 1; i < last && seq.charAt(j) == charBuf[i]; i++, j++) { /* empty */ }
+                if (i == last) // found full sequence
+                    return offset - bufPos;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * Reads characters up to the specific char.
+     * @param c the delimiter
+     * @return the chars read
+     */
+    public String consumeTo(char c) {
+        int offset = nextIndexOf(c);
+        if (offset != -1) {
+            String consumed = cacheString(charBuf, stringCache, bufPos, offset);
+            bufPos += offset;
+            return consumed;
+        } else {
+            return consumeToEnd();
+        }
+    }
+
+    String consumeTo(String seq) {
+        int offset = nextIndexOf(seq);
+        if (offset != -1) {
+            String consumed = cacheString(charBuf, stringCache, bufPos, offset);
+            bufPos += offset;
+            return consumed;
+        } else {
+            return consumeToEnd();
+        }
+    }
+
+    /**
+     * Read characters until the first of any delimiters is found.
+     * @param chars delimiters to scan for
+     * @return characters read up to the matched delimiter.
+     */
+    public String consumeToAny(final char... chars) {
+        bufferUp();
+        final int start = bufPos;
+        final int remaining = bufLength;
+        final char[] val = charBuf;
+
+        OUTER: while (bufPos < remaining) {
+            for (char c : chars) {
+                if (val[bufPos] == c)
+                    break OUTER;
+            }
+            bufPos++;
+        }
+
+        return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+    }
+
+    String consumeToAnySorted(final char... chars) {
+        bufferUp();
+        final int start = bufPos;
+        final int remaining = bufLength;
+        final char[] val = charBuf;
+
+        while (bufPos < remaining) {
+            if (Arrays.binarySearch(chars, val[bufPos]) >= 0)
+                break;
+            bufPos++;
+        }
+
+        return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+    }
+
+    String consumeData() {
+        // &, <, null
+        bufferUp();
+        final int start = bufPos;
+        final int remaining = bufLength;
+        final char[] val = charBuf;
+
+        while (bufPos < remaining) {
+            final char c = val[bufPos];
+            if (c == '&'|| c ==  '<' || c ==  TokeniserState.nullChar)
+                break;
+            bufPos++;
+        }
+
+        return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+    }
+
+    String consumeTagName() {
+        // '\t', '\n', '\r', '\f', ' ', '/', '>', nullChar
+        bufferUp();
+        final int start = bufPos;
+        final int remaining = bufLength;
+        final char[] val = charBuf;
+
+        while (bufPos < remaining) {
+            final char c = val[bufPos];
+            if (c == '\t'|| c ==  '\n'|| c ==  '\r'|| c ==  '\f'|| c ==  ' '|| c ==  '/'|| c ==  '>'|| c ==  TokeniserState.nullChar)
+                break;
+            bufPos++;
+        }
+
+        return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+    }
+
+    String consumeToEnd() {
+        bufferUp();
+        String data = cacheString(charBuf, stringCache, bufPos, bufLength - bufPos);
+        bufPos = bufLength;
+        return data;
+    }
+
+    String consumeLetterSequence() {
+        bufferUp();
+        int start = bufPos;
+        while (bufPos < bufLength) {
+            char c = charBuf[bufPos];
+            if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || Character.isLetter(c))
+                bufPos++;
+            else
+                break;
+        }
+
+        return cacheString(charBuf, stringCache, start, bufPos - start);
+    }
+
+    String consumeLetterThenDigitSequence() {
+        bufferUp();
+        int start = bufPos;
+        while (bufPos < bufLength) {
+            char c = charBuf[bufPos];
+            if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || Character.isLetter(c))
+                bufPos++;
+            else
+                break;
+        }
+        while (!isEmptyNoBufferUp()) {
+            char c = charBuf[bufPos];
+            if (c >= '0' && c <= '9')
+                bufPos++;
+            else
+                break;
+        }
+
+        return cacheString(charBuf, stringCache, start, bufPos - start);
+    }
+
+    String consumeHexSequence() {
+        bufferUp();
+        int start = bufPos;
+        while (bufPos < bufLength) {
+            char c = charBuf[bufPos];
+            if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
+                bufPos++;
+            else
+                break;
+        }
+        return cacheString(charBuf, stringCache, start, bufPos - start);
+    }
+
+    String consumeDigitSequence() {
+        bufferUp();
+        int start = bufPos;
+        while (bufPos < bufLength) {
+            char c = charBuf[bufPos];
+            if (c >= '0' && c <= '9')
+                bufPos++;
+            else
+                break;
+        }
+        return cacheString(charBuf, stringCache, start, bufPos - start);
+    }
+
+    boolean matches(char c) {
+        return !isEmpty() && charBuf[bufPos] == c;
+
+    }
+
+    boolean matches(String seq) {
+        bufferUp();
+        int scanLength = seq.length();
+        if (scanLength > bufLength - bufPos)
+            return false;
+
+        for (int offset = 0; offset < scanLength; offset++)
+            if (seq.charAt(offset) != charBuf[bufPos +offset])
+                return false;
+        return true;
+    }
+
+    boolean matchesIgnoreCase(String seq) {
+        bufferUp();
+        int scanLength = seq.length();
+        if (scanLength > bufLength - bufPos)
+            return false;
+
+        for (int offset = 0; offset < scanLength; offset++) {
+            char upScan = Character.toUpperCase(seq.charAt(offset));
+            char upTarget = Character.toUpperCase(charBuf[bufPos + offset]);
+            if (upScan != upTarget)
+                return false;
+        }
+        return true;
+    }
+
+    boolean matchesAny(char... seq) {
+        if (isEmpty())
+            return false;
+
+        bufferUp();
+        char c = charBuf[bufPos];
+        for (char seek : seq) {
+            if (seek == c)
+                return true;
+        }
+        return false;
+    }
+
+    boolean matchesAnySorted(char[] seq) {
+        bufferUp();
+        return !isEmpty() && Arrays.binarySearch(seq, charBuf[bufPos]) >= 0;
+    }
+
+    boolean matchesLetter() {
+        if (isEmpty())
+            return false;
+        char c = charBuf[bufPos];
+        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || Character.isLetter(c);
+    }
+
+    boolean matchesDigit() {
+        if (isEmpty())
+            return false;
+        char c = charBuf[bufPos];
+        return (c >= '0' && c <= '9');
+    }
+
+    boolean matchConsume(String seq) {
+        bufferUp();
+        if (matches(seq)) {
+            bufPos += seq.length();
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    boolean matchConsumeIgnoreCase(String seq) {
+        if (matchesIgnoreCase(seq)) {
+            bufPos += seq.length();
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    boolean containsIgnoreCase(String seq) {
+        // used to check presence of </title>, </style>. only finds consistent case.
+        String loScan = seq.toLowerCase(Locale.ENGLISH);
+        String hiScan = seq.toUpperCase(Locale.ENGLISH);
+        return (nextIndexOf(loScan) > -1) || (nextIndexOf(hiScan) > -1);
+    }
+
+    @Override
+    public String toString() {
+        return new String(charBuf, bufPos, bufLength - bufPos);
+    }
+
+    /**
+     * Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks.
+     * <p />
+     * Simplistic, and on hash collisions just falls back to creating a new string, vs a full HashMap with Entry list.
+     * That saves both having to create objects as hash keys, and running through the entry list, at the expense of
+     * some more duplicates.
+     */
+    private static String cacheString(final char[] charBuf, final String[] stringCache, final int start, final int count) {
+        // limit (no cache):
+        if (count > maxStringCacheLen)
+            return new String(charBuf, start, count);
+        if (count < 1)
+            return "";
+
+        // calculate hash:
+        int hash = 0;
+        int offset = start;
+        for (int i = 0; i < count; i++) {
+            hash = 31 * hash + charBuf[offset++];
+        }
+
+        // get from cache
+        final int index = hash & stringCache.length - 1;
+        String cached = stringCache[index];
+
+        if (cached == null) { // miss, add
+            cached = new String(charBuf, start, count);
+            stringCache[index] = cached;
+        } else { // hashcode hit, check equality
+            if (rangeEquals(charBuf, start, count, cached)) { // hit
+                return cached;
+            } else { // hashcode conflict
+                cached = new String(charBuf, start, count);
+                stringCache[index] = cached; // update the cache, as recently used strings are more likely to show up again
+            }
+        }
+        return cached;
+    }
+
+    /**
+     * Check if the value of the provided range equals the string.
+     */
+    static boolean rangeEquals(final char[] charBuf, final int start, int count, final String cached) {
+        if (count == cached.length()) {
+            int i = start;
+            int j = 0;
+            while (count-- != 0) {
+                if (charBuf[i++] != cached.charAt(j++))
+                    return false;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    // just used for testing
+    boolean rangeEquals(final int start, final int count, final String cached) {
+        return rangeEquals(charBuf, start, count, cached);
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseError.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseError.java
new file mode 100644
index 00000000..533f9aee
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseError.java
@@ -0,0 +1,41 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+/**
+ * A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
+ */
+public class ParseError {
+    private int pos;
+    private String errorMsg;
+
+    ParseError(int pos, String errorMsg) {
+        this.pos = pos;
+        this.errorMsg = errorMsg;
+    }
+
+    ParseError(int pos, String errorFormat, Object... args) {
+        this.errorMsg = String.format(errorFormat, args);
+        this.pos = pos;
+    }
+
+    /**
+     * Retrieve the error message.
+     * @return the error message.
+     */
+    public String getErrorMessage() {
+        return errorMsg;
+    }
+
+    /**
+     * Retrieves the offset of the error.
+     * @return error offset within input
+     */
+    public int getPosition() {
+        return pos;
+    }
+
+    @Override
+    public String toString() {
+        return pos + ": " + errorMsg;
+    }
+}
+
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseErrorList.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseErrorList.java
new file mode 100644
index 00000000..a3e42a08
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/ParseErrorList.java
@@ -0,0 +1,34 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+import java.util.ArrayList;
+
+/**
+ * A container for ParseErrors.
+ *
+ * @author Jonathan Hedley
+ */
+public class ParseErrorList extends ArrayList<ParseError>{
+    private static final int INITIAL_CAPACITY = 16;
+    private final int maxSize;
+
+    ParseErrorList(int initialCapacity, int maxSize) {
+        super(initialCapacity);
+        this.maxSize = maxSize;
+    }
+
+    boolean canAddError() {
+        return size() < maxSize;
+    }
+
+    int getMaxSize() {
+        return maxSize;
+    }
+
+    public static ParseErrorList noTracking() {
+        return new ParseErrorList(0, 0);
+    }
+
+    public static ParseErrorList tracking(int maxSize) {
+        return new ParseErrorList(INITIAL_CAPACITY, maxSize);
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Token.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Token.java
new file mode 100644
index 00000000..0b157d07
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Token.java
@@ -0,0 +1,398 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+import android.support.annotation.NonNull;
+
+import ru.noties.markwon.html.jsoup.helper.Validate;
+import ru.noties.markwon.html.jsoup.nodes.Attributes;
+
+import static ru.noties.markwon.html.jsoup.helper.Normalizer.lowerCase;
+
+/**
+ * Parse tokens for the Tokeniser.
+ */
+public abstract class Token {
+
+    public final TokenType type;
+
+    protected Token(@NonNull TokenType tokenType) {
+        this.type = tokenType;
+    }
+
+//    String tokenType() {
+//        return this.getClass().getSimpleName();
+//    }
+
+    /**
+     * Reset the data represent by this token, for reuse. Prevents the need to create transfer objects for every
+     * piece of data, which immediately get GCed.
+     */
+    public abstract Token reset();
+
+    static void reset(StringBuilder sb) {
+        if (sb != null) {
+            sb.delete(0, sb.length());
+        }
+    }
+
+    public static final class Doctype extends Token {
+        final StringBuilder name = new StringBuilder();
+        String pubSysKey = null;
+        final StringBuilder publicIdentifier = new StringBuilder();
+        final StringBuilder systemIdentifier = new StringBuilder();
+        boolean forceQuirks = false;
+
+        Doctype() {
+            super(TokenType.Doctype);
+        }
+
+        @Override
+        public Token reset() {
+            reset(name);
+            pubSysKey = null;
+            reset(publicIdentifier);
+            reset(systemIdentifier);
+            forceQuirks = false;
+            return this;
+        }
+
+        String getName() {
+            return name.toString();
+        }
+
+        String getPubSysKey() {
+            return pubSysKey;
+        }
+
+        String getPublicIdentifier() {
+            return publicIdentifier.toString();
+        }
+
+        public String getSystemIdentifier() {
+            return systemIdentifier.toString();
+        }
+
+        public boolean isForceQuirks() {
+            return forceQuirks;
+        }
+    }
+
+    public static abstract class Tag extends Token {
+
+        public String tagName;
+        public String normalName; // lc version of tag name, for case insensitive tree build
+        private String pendingAttributeName; // attribute names are generally caught in one hop, not accumulated
+        private StringBuilder pendingAttributeValue = new StringBuilder(); // but values are accumulated, from e.g. & in hrefs
+        private String pendingAttributeValueS; // try to get attr vals in one shot, vs Builder
+        private boolean hasEmptyAttributeValue = false; // distinguish boolean attribute from empty string value
+        private boolean hasPendingAttributeValue = false;
+        public boolean selfClosing = false;
+        public Attributes attributes; // start tags get attributes on construction. End tags get attributes on first new attribute (but only for parser convenience, not used).
+
+        protected Tag(@NonNull TokenType tokenType) {
+            super(tokenType);
+        }
+
+        @Override
+        public Tag reset() {
+            tagName = null;
+            normalName = null;
+            pendingAttributeName = null;
+            reset(pendingAttributeValue);
+            pendingAttributeValueS = null;
+            hasEmptyAttributeValue = false;
+            hasPendingAttributeValue = false;
+            selfClosing = false;
+            attributes = null;
+            return this;
+        }
+
+        final void newAttribute() {
+            if (attributes == null)
+                attributes = new Attributes();
+
+            if (pendingAttributeName != null) {
+                // the tokeniser has skipped whitespace control chars, but trimming could collapse to empty for other control codes, so verify here
+                pendingAttributeName = pendingAttributeName.trim();
+                if (pendingAttributeName.length() > 0) {
+                    String value;
+                    if (hasPendingAttributeValue)
+                        value = pendingAttributeValue.length() > 0 ? pendingAttributeValue.toString() : pendingAttributeValueS;
+                    else if (hasEmptyAttributeValue)
+                        value = "";
+                    else
+                        value = null;
+                    attributes.put(pendingAttributeName, value);
+                }
+            }
+            pendingAttributeName = null;
+            hasEmptyAttributeValue = false;
+            hasPendingAttributeValue = false;
+            reset(pendingAttributeValue);
+            pendingAttributeValueS = null;
+        }
+
+        final void finaliseTag() {
+            // finalises for emit
+            if (pendingAttributeName != null) {
+                // todo: check if attribute name exists; if so, drop and error
+                newAttribute();
+            }
+        }
+
+        final String name() { // preserves case, for input into Tag.valueOf (which may drop case)
+            Validate.isFalse(tagName == null || tagName.length() == 0);
+            return tagName;
+        }
+
+        final String normalName() { // loses case, used in tree building for working out where in tree it should go
+            return normalName;
+        }
+
+        final Tag name(String name) {
+            tagName = name;
+            normalName = lowerCase(name);
+            return this;
+        }
+
+        final boolean isSelfClosing() {
+            return selfClosing;
+        }
+
+        @SuppressWarnings({"TypeMayBeWeakened"})
+        final Attributes getAttributes() {
+            return attributes;
+        }
+
+        // these appenders are rarely hit in not null state-- caused by null chars.
+        final void appendTagName(String append) {
+            tagName = tagName == null ? append : tagName.concat(append);
+            normalName = lowerCase(tagName);
+        }
+
+        final void appendTagName(char append) {
+            appendTagName(String.valueOf(append));
+        }
+
+        final void appendAttributeName(String append) {
+            pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
+        }
+
+        final void appendAttributeName(char append) {
+            appendAttributeName(String.valueOf(append));
+        }
+
+        final void appendAttributeValue(String append) {
+            ensureAttributeValue();
+            if (pendingAttributeValue.length() == 0) {
+                pendingAttributeValueS = append;
+            } else {
+                pendingAttributeValue.append(append);
+            }
+        }
+
+        final void appendAttributeValue(char append) {
+            ensureAttributeValue();
+            pendingAttributeValue.append(append);
+        }
+
+        final void appendAttributeValue(char[] append) {
+            ensureAttributeValue();
+            pendingAttributeValue.append(append);
+        }
+
+        final void appendAttributeValue(int[] appendCodepoints) {
+            ensureAttributeValue();
+            for (int codepoint : appendCodepoints) {
+                pendingAttributeValue.appendCodePoint(codepoint);
+            }
+        }
+
+        final void setEmptyAttributeValue() {
+            hasEmptyAttributeValue = true;
+        }
+
+        private void ensureAttributeValue() {
+            hasPendingAttributeValue = true;
+            // if on second hit, we'll need to move to the builder
+            if (pendingAttributeValueS != null) {
+                pendingAttributeValue.append(pendingAttributeValueS);
+                pendingAttributeValueS = null;
+            }
+        }
+    }
+
+    public final static class StartTag extends Tag {
+        StartTag() {
+            super(TokenType.StartTag);
+            attributes = new Attributes();
+        }
+
+        @Override
+        public Tag reset() {
+            super.reset();
+            attributes = new Attributes();
+            // todo - would prefer these to be null, but need to check Element assertions
+            return this;
+        }
+
+        StartTag nameAttr(String name, Attributes attributes) {
+            this.tagName = name;
+            this.attributes = attributes;
+            normalName = lowerCase(tagName);
+            return this;
+        }
+
+        @Override
+        public String toString() {
+            if (attributes != null && attributes.size() > 0)
+                return "<" + name() + " " + attributes.toString() + ">";
+            else
+                return "<" + name() + ">";
+        }
+    }
+
+    public final static class EndTag extends Tag{
+        EndTag() {
+            super(TokenType.EndTag);
+        }
+
+        @Override
+        public String toString() {
+            return "</" + name() + ">";
+        }
+    }
+
+    public final static class Comment extends Token {
+        final StringBuilder data = new StringBuilder();
+        boolean bogus = false;
+
+        @Override
+        public Token reset() {
+            reset(data);
+            bogus = false;
+            return this;
+        }
+
+        Comment() {
+            super(TokenType.Comment);
+        }
+
+        String getData() {
+            return data.toString();
+        }
+
+        @Override
+        public String toString() {
+            return "<!--" + getData() + "-->";
+        }
+    }
+
+    public static class Character extends Token {
+        private String data;
+
+        Character() {
+            super(TokenType.Character);
+        }
+
+        @Override
+        public Token reset() {
+            data = null;
+            return this;
+        }
+
+        Character data(String data) {
+            this.data = data;
+            return this;
+        }
+
+        public String getData() {
+            return data;
+        }
+
+        @Override
+        public String toString() {
+            return getData();
+        }
+    }
+
+    public final static class CData extends Character {
+        CData(String data) {
+            super();
+            this.data(data);
+        }
+
+        @Override
+        public String toString() {
+            return "<![CDATA[" + getData() + "]]>";
+        }
+
+    }
+
+    public final static class EOF extends Token {
+        EOF() {
+            super(Token.TokenType.EOF);
+        }
+
+        @Override
+        public Token reset() {
+            return this;
+        }
+    }
+
+//    final boolean isDoctype() {
+//        return type == TokenType.Doctype;
+//    }
+//
+//    final Doctype asDoctype() {
+//        return (Doctype) this;
+//    }
+//
+//    final boolean isStartTag() {
+//        return type == TokenType.StartTag;
+//    }
+//
+//    final StartTag asStartTag() {
+//        return (StartTag) this;
+//    }
+//
+//    final boolean isEndTag() {
+//        return type == TokenType.EndTag;
+//    }
+//
+//    final EndTag asEndTag() {
+//        return (EndTag) this;
+//    }
+//
+//    final boolean isComment() {
+//        return type == TokenType.Comment;
+//    }
+//
+//    final Comment asComment() {
+//        return (Comment) this;
+//    }
+//
+//    final boolean isCharacter() {
+//        return type == TokenType.Character;
+//    }
+//
+//    final boolean isCData() {
+//        return this instanceof CData;
+//    }
+//
+//    final Character asCharacter() {
+//        return (Character) this;
+//    }
+//
+//    final boolean isEOF() {
+//        return type == TokenType.EOF;
+//    }
+
+    public enum TokenType {
+        Doctype,
+        StartTag,
+        EndTag,
+        Comment,
+        Character, // note no CData - treated in builder as an extension of Character
+        EOF
+    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Tokeniser.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Tokeniser.java
new file mode 100644
index 00000000..3d5284bd
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/Tokeniser.java
@@ -0,0 +1,295 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+import java.util.Arrays;
+
+import ru.noties.markwon.html.jsoup.helper.Validate;
+import ru.noties.markwon.html.jsoup.nodes.Entities;
+
+/**
+ * Readers the input stream into tokens.
+ */
+public final class Tokeniser {
+    static final char replacementChar = '\uFFFD'; // replaces null character
+    private static final char[] notCharRefCharsSorted = new char[]{'\t', '\n', '\r', '\f', ' ', '<', '&'};
+
+    // Some illegal character escapes are parsed by browsers as windows-1252 instead. See issue #1034
+    // https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
+    static final int win1252ExtensionsStart = 0x80;
+    static final int[] win1252Extensions = new int[] {
+            // we could build this manually, but Windows-1252 is not a standard java charset so that could break on
+            // some platforms - this table is verified with a test
+            0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
+            0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F,
+            0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
+            0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178,
+    };
+
+    static {
+        Arrays.sort(notCharRefCharsSorted);
+    }
+
+    private final CharacterReader reader; // html input
+    private final ParseErrorList errors; // errors found while tokenising
+
+    private TokeniserState state = TokeniserState.Data; // current tokenisation state
+    private Token emitPending; // the token we are about to emit on next read
+    private boolean isEmitPending = false;
+    private String charsString = null; // characters pending an emit. Will fall to charsBuilder if more than one
+    private StringBuilder charsBuilder = new StringBuilder(1024); // buffers characters to output as one token, if more than one emit per read
+    StringBuilder dataBuffer = new StringBuilder(1024); // buffers data looking for </script>
+
+    Token.Tag tagPending; // tag we are building up
+    Token.StartTag startPending = new Token.StartTag();
+    Token.EndTag endPending = new Token.EndTag();
+    Token.Character charPending = new Token.Character();
+    Token.Doctype doctypePending = new Token.Doctype(); // doctype building up
+    Token.Comment commentPending = new Token.Comment(); // comment building up
+    private String lastStartTag; // the last start tag emitted, to test appropriate end tag
+
+    public Tokeniser(CharacterReader reader, ParseErrorList errors) {
+        this.reader = reader;
+        this.errors = errors;
+    }
+
+    public Token read() {
+        while (!isEmitPending)
+            state.read(this, reader);
+
+        // if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
+        if (charsBuilder.length() > 0) {
+            String str = charsBuilder.toString();
+            charsBuilder.delete(0, charsBuilder.length());
+            charsString = null;
+            return charPending.data(str);
+        } else if (charsString != null) {
+            Token token = charPending.data(charsString);
+            charsString = null;
+            return token;
+        } else {
+            isEmitPending = false;
+            return emitPending;
+        }
+    }
+
+    void emit(Token token) {
+        Validate.isFalse(isEmitPending, "There is an unread token pending!");
+
+        emitPending = token;
+        isEmitPending = true;
+
+        if (token.type == Token.TokenType.StartTag) {
+            Token.StartTag startTag = (Token.StartTag) token;
+            lastStartTag = startTag.tagName;
+        } else if (token.type == Token.TokenType.EndTag) {
+            Token.EndTag endTag = (Token.EndTag) token;
+            if (endTag.attributes != null)
+                error("Attributes incorrectly present on end tag");
+        }
+    }
+
+    void emit(final String str) {
+        // buffer strings up until last string token found, to emit only one token for a run of character refs etc.
+        // does not set isEmitPending; read checks that
+        if (charsString == null) {
+            charsString = str;
+        }
+        else {
+            if (charsBuilder.length() == 0) { // switching to string builder as more than one emit before read
+                charsBuilder.append(charsString);
+            }
+            charsBuilder.append(str);
+        }
+    }
+
+    void emit(char[] chars) {
+        emit(String.valueOf(chars));
+    }
+
+    void emit(int[] codepoints) {
+        emit(new String(codepoints, 0, codepoints.length));
+    }
+
+    void emit(char c) {
+        emit(String.valueOf(c));
+    }
+
+    TokeniserState getState() {
+        return state;
+    }
+
+    void transition(TokeniserState state) {
+        this.state = state;
+    }
+
+    void advanceTransition(TokeniserState state) {
+        reader.advance();
+        this.state = state;
+    }
+
+    final private int[] codepointHolder = new int[1]; // holder to not have to keep creating arrays
+    final private int[] multipointHolder = new int[2];
+    int[] consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
+        if (reader.isEmpty())
+            return null;
+        if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
+            return null;
+        if (reader.matchesAnySorted(notCharRefCharsSorted))
+            return null;
+
+        final int[] codeRef = codepointHolder;
+        reader.mark();
+        if (reader.matchConsume("#")) { // numbered
+            boolean isHexMode = reader.matchConsumeIgnoreCase("X");
+            String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
+            if (numRef.length() == 0) { // didn't match anything
+                characterReferenceError("numeric reference with no numerals");
+                reader.rewindToMark();
+                return null;
+            }
+            if (!reader.matchConsume(";"))
+                characterReferenceError("missing semicolon"); // missing semi
+            int charval = -1;
+            try {
+                int base = isHexMode ? 16 : 10;
+                charval = Integer.valueOf(numRef, base);
+            } catch (NumberFormatException ignored) {
+            } // skip
+            if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
+                characterReferenceError("character outside of valid range");
+                codeRef[0] = replacementChar;
+                return codeRef;
+            } else {
+                // fix illegal unicode characters to match browser behavior
+                if (charval >= win1252ExtensionsStart && charval < win1252ExtensionsStart + win1252Extensions.length) {
+                    characterReferenceError("character is not a valid unicode code point");
+                    charval = win1252Extensions[charval - win1252ExtensionsStart];
+                }
+
+                // todo: implement number replacement table
+                // todo: check for extra illegal unicode points as parse errors
+                codeRef[0] = charval;
+                return codeRef;
+            }
+        } else { // named
+            // get as many letters as possible, and look for matching entities.
+            String nameRef = reader.consumeLetterThenDigitSequence();
+            boolean looksLegit = reader.matches(';');
+            // found if a base named entity without a ;, or an extended entity with the ;.
+            boolean found = (Entities.isBaseNamedEntity(nameRef) || (Entities.isNamedEntity(nameRef) && looksLegit));
+
+            if (!found) {
+                reader.rewindToMark();
+                if (looksLegit) // named with semicolon
+                    characterReferenceError(String.format("invalid named referenece '%s'", nameRef));
+                return null;
+            }
+            if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
+                // don't want that to match
+                reader.rewindToMark();
+                return null;
+            }
+            if (!reader.matchConsume(";"))
+                characterReferenceError("missing semicolon"); // missing semi
+            int numChars = Entities.codepointsForName(nameRef, multipointHolder);
+            if (numChars == 1) {
+                codeRef[0] = multipointHolder[0];
+                return codeRef;
+            } else if (numChars ==2) {
+                return multipointHolder;
+            } else {
+                Validate.fail("Unexpected characters returned for " + nameRef);
+                return multipointHolder;
+            }
+        }
+    }
+
+    Token.Tag createTagPending(boolean start) {
+        tagPending = start ? startPending.reset() : endPending.reset();
+        return tagPending;
+    }
+
+    void emitTagPending() {
+        tagPending.finaliseTag();
+        emit(tagPending);
+    }
+
+    void createCommentPending() {
+        commentPending.reset();
+    }
+
+    void emitCommentPending() {
+        emit(commentPending);
+    }
+
+    void createDoctypePending() {
+        doctypePending.reset();
+    }
+
+    void emitDoctypePending() {
+        emit(doctypePending);
+    }
+
+    void createTempBuffer() {
+        Token.reset(dataBuffer);
+    }
+
+    boolean isAppropriateEndTagToken() {
+        return lastStartTag != null && tagPending.name().equalsIgnoreCase(lastStartTag);
+    }
+
+    String appropriateEndTagName() {
+        return lastStartTag; // could be null
+    }
+
+    void error(TokeniserState state) {
+        if (errors.canAddError())
+            errors.add(new ParseError(reader.pos(), "Unexpected character '%s' in input state [%s]", reader.current(), state));
+    }
+
+    void eofError(TokeniserState state) {
+        if (errors.canAddError())
+            errors.add(new ParseError(reader.pos(), "Unexpectedly reached end of file (EOF) in input state [%s]", state));
+    }
+
+    private void characterReferenceError(String message) {
+        if (errors.canAddError())
+            errors.add(new ParseError(reader.pos(), "Invalid character reference: %s", message));
+    }
+
+    void error(String errorMsg) {
+        if (errors.canAddError())
+            errors.add(new ParseError(reader.pos(), errorMsg));
+    }
+
+    boolean currentNodeInHtmlNS() {
+        // todo: implement namespaces correctly
+        return true;
+        // Element currentNode = currentNode();
+        // return currentNode != null && currentNode.namespace().equals("HTML");
+    }
+
+//    /**
+//     * Utility method to consume reader and unescape entities found within.
+//     * @param inAttribute if the text to be unescaped is in an attribute
+//     * @return unescaped string from reader
+//     */
+//    String unescapeEntities(boolean inAttribute) {
+//        StringBuilder builder = StringUtil.stringBuilder();
+//        while (!reader.isEmpty()) {
+//            builder.append(reader.consumeTo('&'));
+//            if (reader.matches('&')) {
+//                reader.consume();
+//                int[] c = consumeCharacterReference(null, inAttribute);
+//                if (c == null || c.length==0)
+//                    builder.append('&');
+//                else {
+//                    builder.appendCodePoint(c[0]);
+//                    if (c.length == 2)
+//                        builder.appendCodePoint(c[1]);
+//                }
+//
+//            }
+//        }
+//        return builder.toString();
+//    }
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/TokeniserState.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/TokeniserState.java
new file mode 100644
index 00000000..01a98958
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/jsoup/parser/TokeniserState.java
@@ -0,0 +1,1737 @@
+package ru.noties.markwon.html.jsoup.parser;
+
+import ru.noties.markwon.html.jsoup.nodes.DocumentType;
+
+/**
+ * States and transition activations for the Tokeniser.
+ */
+enum TokeniserState {
+    Data {
+        // in data state, gather characters until a character reference or tag is found
+        void read(Tokeniser t, CharacterReader r) {
+            switch (r.current()) {
+                case '&':
+                    t.advanceTransition(CharacterReferenceInData);
+                    break;
+                case '<':
+                    t.advanceTransition(TagOpen);
+                    break;
+                case nullChar:
+                    t.error(this); // NOT replacement character (oddly?)
+                    t.emit(r.consume());
+                    break;
+                case eof:
+                    t.emit(new Token.EOF());
+                    break;
+                default:
+                    String data = r.consumeData();
+                    t.emit(data);
+                    break;
+            }
+        }
+    },
+    CharacterReferenceInData {
+        // from & in data
+        void read(Tokeniser t, CharacterReader r) {
+            readCharRef(t, Data);
+        }
+    },
+    Rcdata {
+        /// handles data in title, textarea etc
+        void read(Tokeniser t, CharacterReader r) {
+            switch (r.current()) {
+                case '&':
+                    t.advanceTransition(CharacterReferenceInRcdata);
+                    break;
+                case '<':
+                    t.advanceTransition(RcdataLessthanSign);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    r.advance();
+                    t.emit(replacementChar);
+                    break;
+                case eof:
+                    t.emit(new Token.EOF());
+                    break;
+                default:
+                    String data = r.consumeToAny('&', '<', nullChar);
+                    t.emit(data);
+                    break;
+            }
+        }
+    },
+    CharacterReferenceInRcdata {
+        void read(Tokeniser t, CharacterReader r) {
+            readCharRef(t, Rcdata);
+        }
+    },
+    Rawtext {
+        void read(Tokeniser t, CharacterReader r) {
+            readData(t, r, this, RawtextLessthanSign);
+        }
+    },
+    ScriptData {
+        void read(Tokeniser t, CharacterReader r) {
+            readData(t, r, this, ScriptDataLessthanSign);
+        }
+    },
+    PLAINTEXT {
+        void read(Tokeniser t, CharacterReader r) {
+            switch (r.current()) {
+                case nullChar:
+                    t.error(this);
+                    r.advance();
+                    t.emit(replacementChar);
+                    break;
+                case eof:
+                    t.emit(new Token.EOF());
+                    break;
+                default:
+                    String data = r.consumeTo(nullChar);
+                    t.emit(data);
+                    break;
+            }
+        }
+    },
+    TagOpen {
+        // from < in data
+        void read(Tokeniser t, CharacterReader r) {
+            switch (r.current()) {
+                case '!':
+                    t.advanceTransition(MarkupDeclarationOpen);
+                    break;
+                case '/':
+                    t.advanceTransition(EndTagOpen);
+                    break;
+                case '?':
+                    t.advanceTransition(BogusComment);
+                    break;
+                default:
+                    if (r.matchesLetter()) {
+                        t.createTagPending(true);
+                        t.transition(TagName);
+                    } else {
+                        t.error(this);
+                        t.emit('<'); // char that got us here
+                        t.transition(Data);
+                    }
+                    break;
+            }
+        }
+    },
+    EndTagOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.isEmpty()) {
+                t.eofError(this);
+                t.emit("</");
+                t.transition(Data);
+            } else if (r.matchesLetter()) {
+                t.createTagPending(false);
+                t.transition(TagName);
+            } else if (r.matches('>')) {
+                t.error(this);
+                t.advanceTransition(Data);
+            } else {
+                t.error(this);
+                t.advanceTransition(BogusComment);
+            }
+        }
+    },
+    TagName {
+        // from < or </ in data, will have start or end tag pending
+        void read(Tokeniser t, CharacterReader r) {
+            // previous TagOpen state did NOT consume, will have a letter char in current
+            //String tagName = r.consumeToAnySorted(tagCharsSorted).toLowerCase();
+            String tagName = r.consumeTagName();
+            t.tagPending.appendTagName(tagName);
+
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeAttributeName);
+                    break;
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case nullChar: // replacement
+                    t.tagPending.appendTagName(replacementStr);
+                    break;
+                case eof: // should emit pending tag?
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default: // buffer underrun
+                    t.tagPending.appendTagName(c);
+            }
+        }
+    },
+    RcdataLessthanSign {
+        // from < in rcdata
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matches('/')) {
+                t.createTempBuffer();
+                t.advanceTransition(RCDATAEndTagOpen);
+            } else if (r.matchesLetter() && t.appropriateEndTagName() != null && !r.containsIgnoreCase("</" + t.appropriateEndTagName())) {
+                // diverge from spec: got a start tag, but there's no appropriate end tag (</title>), so rather than
+                // consuming to EOF; break out here
+                t.tagPending = t.createTagPending(false).name(t.appropriateEndTagName());
+                t.emitTagPending();
+                r.unconsume(); // undo "<"
+                t.transition(Data);
+            } else {
+                t.emit("<");
+                t.transition(Rcdata);
+            }
+        }
+    },
+    RCDATAEndTagOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                t.createTagPending(false);
+                t.tagPending.appendTagName(r.current());
+                t.dataBuffer.append(r.current());
+                t.advanceTransition(RCDATAEndTagName);
+            } else {
+                t.emit("</");
+                t.transition(Rcdata);
+            }
+        }
+    },
+    RCDATAEndTagName {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                String name = r.consumeLetterSequence();
+                t.tagPending.appendTagName(name);
+                t.dataBuffer.append(name);
+                return;
+            }
+
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    if (t.isAppropriateEndTagToken())
+                        t.transition(BeforeAttributeName);
+                    else
+                        anythingElse(t, r);
+                    break;
+                case '/':
+                    if (t.isAppropriateEndTagToken())
+                        t.transition(SelfClosingStartTag);
+                    else
+                        anythingElse(t, r);
+                    break;
+                case '>':
+                    if (t.isAppropriateEndTagToken()) {
+                        t.emitTagPending();
+                        t.transition(Data);
+                    }
+                    else
+                        anythingElse(t, r);
+                    break;
+                default:
+                    anythingElse(t, r);
+            }
+        }
+
+        private void anythingElse(Tokeniser t, CharacterReader r) {
+            t.emit("</" + t.dataBuffer.toString());
+            r.unconsume();
+            t.transition(Rcdata);
+        }
+    },
+    RawtextLessthanSign {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matches('/')) {
+                t.createTempBuffer();
+                t.advanceTransition(RawtextEndTagOpen);
+            } else {
+                t.emit('<');
+                t.transition(Rawtext);
+            }
+        }
+    },
+    RawtextEndTagOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            readEndTag(t, r, RawtextEndTagName, Rawtext);
+        }
+    },
+    RawtextEndTagName {
+        void read(Tokeniser t, CharacterReader r) {
+            handleDataEndTag(t, r, Rawtext);
+        }
+    },
+    ScriptDataLessthanSign {
+        void read(Tokeniser t, CharacterReader r) {
+            switch (r.consume()) {
+                case '/':
+                    t.createTempBuffer();
+                    t.transition(ScriptDataEndTagOpen);
+                    break;
+                case '!':
+                    t.emit("<!");
+                    t.transition(ScriptDataEscapeStart);
+                    break;
+                default:
+                    t.emit("<");
+                    r.unconsume();
+                    t.transition(ScriptData);
+            }
+        }
+    },
+    ScriptDataEndTagOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            readEndTag(t, r, ScriptDataEndTagName, ScriptData);
+        }
+    },
+    ScriptDataEndTagName {
+        void read(Tokeniser t, CharacterReader r) {
+            handleDataEndTag(t, r, ScriptData);
+        }
+    },
+    ScriptDataEscapeStart {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matches('-')) {
+                t.emit('-');
+                t.advanceTransition(ScriptDataEscapeStartDash);
+            } else {
+                t.transition(ScriptData);
+            }
+        }
+    },
+    ScriptDataEscapeStartDash {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matches('-')) {
+                t.emit('-');
+                t.advanceTransition(ScriptDataEscapedDashDash);
+            } else {
+                t.transition(ScriptData);
+            }
+        }
+    },
+    ScriptDataEscaped {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.isEmpty()) {
+                t.eofError(this);
+                t.transition(Data);
+                return;
+            }
+
+            switch (r.current()) {
+                case '-':
+                    t.emit('-');
+                    t.advanceTransition(ScriptDataEscapedDash);
+                    break;
+                case '<':
+                    t.advanceTransition(ScriptDataEscapedLessthanSign);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    r.advance();
+                    t.emit(replacementChar);
+                    break;
+                default:
+                    String data = r.consumeToAny('-', '<', nullChar);
+                    t.emit(data);
+            }
+        }
+    },
+    ScriptDataEscapedDash {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.isEmpty()) {
+                t.eofError(this);
+                t.transition(Data);
+                return;
+            }
+
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.emit(c);
+                    t.transition(ScriptDataEscapedDashDash);
+                    break;
+                case '<':
+                    t.transition(ScriptDataEscapedLessthanSign);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.emit(replacementChar);
+                    t.transition(ScriptDataEscaped);
+                    break;
+                default:
+                    t.emit(c);
+                    t.transition(ScriptDataEscaped);
+            }
+        }
+    },
+    ScriptDataEscapedDashDash {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.isEmpty()) {
+                t.eofError(this);
+                t.transition(Data);
+                return;
+            }
+
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.emit(c);
+                    break;
+                case '<':
+                    t.transition(ScriptDataEscapedLessthanSign);
+                    break;
+                case '>':
+                    t.emit(c);
+                    t.transition(ScriptData);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.emit(replacementChar);
+                    t.transition(ScriptDataEscaped);
+                    break;
+                default:
+                    t.emit(c);
+                    t.transition(ScriptDataEscaped);
+            }
+        }
+    },
+    ScriptDataEscapedLessthanSign {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                t.createTempBuffer();
+                t.dataBuffer.append(r.current());
+                t.emit("<" + r.current());
+                t.advanceTransition(ScriptDataDoubleEscapeStart);
+            } else if (r.matches('/')) {
+                t.createTempBuffer();
+                t.advanceTransition(ScriptDataEscapedEndTagOpen);
+            } else {
+                t.emit('<');
+                t.transition(ScriptDataEscaped);
+            }
+        }
+    },
+    ScriptDataEscapedEndTagOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                t.createTagPending(false);
+                t.tagPending.appendTagName(r.current());
+                t.dataBuffer.append(r.current());
+                t.advanceTransition(ScriptDataEscapedEndTagName);
+            } else {
+                t.emit("</");
+                t.transition(ScriptDataEscaped);
+            }
+        }
+    },
+    ScriptDataEscapedEndTagName {
+        void read(Tokeniser t, CharacterReader r) {
+            handleDataEndTag(t, r, ScriptDataEscaped);
+        }
+    },
+    ScriptDataDoubleEscapeStart {
+        void read(Tokeniser t, CharacterReader r) {
+            handleDataDoubleEscapeTag(t, r, ScriptDataDoubleEscaped, ScriptDataEscaped);
+        }
+    },
+    ScriptDataDoubleEscaped {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.current();
+            switch (c) {
+                case '-':
+                    t.emit(c);
+                    t.advanceTransition(ScriptDataDoubleEscapedDash);
+                    break;
+                case '<':
+                    t.emit(c);
+                    t.advanceTransition(ScriptDataDoubleEscapedLessthanSign);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    r.advance();
+                    t.emit(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default:
+                    String data = r.consumeToAny('-', '<', nullChar);
+                    t.emit(data);
+            }
+        }
+    },
+    ScriptDataDoubleEscapedDash {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.emit(c);
+                    t.transition(ScriptDataDoubleEscapedDashDash);
+                    break;
+                case '<':
+                    t.emit(c);
+                    t.transition(ScriptDataDoubleEscapedLessthanSign);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.emit(replacementChar);
+                    t.transition(ScriptDataDoubleEscaped);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default:
+                    t.emit(c);
+                    t.transition(ScriptDataDoubleEscaped);
+            }
+        }
+    },
+    ScriptDataDoubleEscapedDashDash {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.emit(c);
+                    break;
+                case '<':
+                    t.emit(c);
+                    t.transition(ScriptDataDoubleEscapedLessthanSign);
+                    break;
+                case '>':
+                    t.emit(c);
+                    t.transition(ScriptData);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.emit(replacementChar);
+                    t.transition(ScriptDataDoubleEscaped);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default:
+                    t.emit(c);
+                    t.transition(ScriptDataDoubleEscaped);
+            }
+        }
+    },
+    ScriptDataDoubleEscapedLessthanSign {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matches('/')) {
+                t.emit('/');
+                t.createTempBuffer();
+                t.advanceTransition(ScriptDataDoubleEscapeEnd);
+            } else {
+                t.transition(ScriptDataDoubleEscaped);
+            }
+        }
+    },
+    ScriptDataDoubleEscapeEnd {
+        void read(Tokeniser t, CharacterReader r) {
+            handleDataDoubleEscapeTag(t,r, ScriptDataEscaped, ScriptDataDoubleEscaped);
+        }
+    },
+    BeforeAttributeName {
+        // from tagname <xxx
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break; // ignore whitespace
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.newAttribute();
+                    r.unconsume();
+                    t.transition(AttributeName);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                case '"':
+                case '\'':
+                case '<':
+                case '=':
+                    t.error(this);
+                    t.tagPending.newAttribute();
+                    t.tagPending.appendAttributeName(c);
+                    t.transition(AttributeName);
+                    break;
+                default: // A-Z, anything else
+                    t.tagPending.newAttribute();
+                    r.unconsume();
+                    t.transition(AttributeName);
+            }
+        }
+    },
+    AttributeName {
+        // from before attribute name
+        void read(Tokeniser t, CharacterReader r) {
+            String name = r.consumeToAnySorted(attributeNameCharsSorted);
+            t.tagPending.appendAttributeName(name);
+
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(AfterAttributeName);
+                    break;
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '=':
+                    t.transition(BeforeAttributeValue);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeName(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                case '"':
+                case '\'':
+                case '<':
+                    t.error(this);
+                    t.tagPending.appendAttributeName(c);
+                    break;
+                default: // buffer underrun
+                    t.tagPending.appendAttributeName(c);
+            }
+        }
+    },
+    AfterAttributeName {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    // ignore
+                    break;
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '=':
+                    t.transition(BeforeAttributeValue);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeName(replacementChar);
+                    t.transition(AttributeName);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                case '"':
+                case '\'':
+                case '<':
+                    t.error(this);
+                    t.tagPending.newAttribute();
+                    t.tagPending.appendAttributeName(c);
+                    t.transition(AttributeName);
+                    break;
+                default: // A-Z, anything else
+                    t.tagPending.newAttribute();
+                    r.unconsume();
+                    t.transition(AttributeName);
+            }
+        }
+    },
+    BeforeAttributeValue {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    // ignore
+                    break;
+                case '"':
+                    t.transition(AttributeValue_doubleQuoted);
+                    break;
+                case '&':
+                    r.unconsume();
+                    t.transition(AttributeValue_unquoted);
+                    break;
+                case '\'':
+                    t.transition(AttributeValue_singleQuoted);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(replacementChar);
+                    t.transition(AttributeValue_unquoted);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case '<':
+                case '=':
+                case '`':
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(c);
+                    t.transition(AttributeValue_unquoted);
+                    break;
+                default:
+                    r.unconsume();
+                    t.transition(AttributeValue_unquoted);
+            }
+        }
+    },
+    AttributeValue_doubleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            String value = r.consumeToAny(attributeDoubleValueCharsSorted);
+            if (value.length() > 0)
+                t.tagPending.appendAttributeValue(value);
+            else
+                t.tagPending.setEmptyAttributeValue();
+
+            char c = r.consume();
+            switch (c) {
+                case '"':
+                    t.transition(AfterAttributeValue_quoted);
+                    break;
+                case '&':
+                    int[] ref = t.consumeCharacterReference('"', true);
+                    if (ref != null)
+                        t.tagPending.appendAttributeValue(ref);
+                    else
+                        t.tagPending.appendAttributeValue('&');
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default: // hit end of buffer in first read, still in attribute
+                    t.tagPending.appendAttributeValue(c);
+            }
+        }
+    },
+    AttributeValue_singleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            String value = r.consumeToAny(attributeSingleValueCharsSorted);
+            if (value.length() > 0)
+                t.tagPending.appendAttributeValue(value);
+            else
+                t.tagPending.setEmptyAttributeValue();
+
+            char c = r.consume();
+            switch (c) {
+                case '\'':
+                    t.transition(AfterAttributeValue_quoted);
+                    break;
+                case '&':
+                    int[] ref = t.consumeCharacterReference('\'', true);
+                    if (ref != null)
+                        t.tagPending.appendAttributeValue(ref);
+                    else
+                        t.tagPending.appendAttributeValue('&');
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default: // hit end of buffer in first read, still in attribute
+                    t.tagPending.appendAttributeValue(c);
+            }
+        }
+    },
+    AttributeValue_unquoted {
+        void read(Tokeniser t, CharacterReader r) {
+            String value = r.consumeToAnySorted(attributeValueUnquoted);
+            if (value.length() > 0)
+                t.tagPending.appendAttributeValue(value);
+
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeAttributeName);
+                    break;
+                case '&':
+                    int[] ref = t.consumeCharacterReference('>', true);
+                    if (ref != null)
+                        t.tagPending.appendAttributeValue(ref);
+                    else
+                        t.tagPending.appendAttributeValue('&');
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                case '"':
+                case '\'':
+                case '<':
+                case '=':
+                case '`':
+                    t.error(this);
+                    t.tagPending.appendAttributeValue(c);
+                    break;
+                default: // hit end of buffer in first read, still in attribute
+                    t.tagPending.appendAttributeValue(c);
+            }
+
+        }
+    },
+    // CharacterReferenceInAttributeValue state handled inline
+    AfterAttributeValue_quoted {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeAttributeName);
+                    break;
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    r.unconsume();
+                    t.transition(BeforeAttributeName);
+            }
+
+        }
+    },
+    SelfClosingStartTag {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '>':
+                    t.tagPending.selfClosing = true;
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    r.unconsume();
+                    t.transition(BeforeAttributeName);
+            }
+        }
+    },
+    BogusComment {
+        void read(Tokeniser t, CharacterReader r) {
+            // todo: handle bogus comment starting from eof. when does that trigger?
+            // rewind to capture character that lead us here
+            r.unconsume();
+            Token.Comment comment = new Token.Comment();
+            comment.bogus = true;
+            comment.data.append(r.consumeTo('>'));
+            // todo: replace nullChar with replaceChar
+            t.emit(comment);
+            t.advanceTransition(Data);
+        }
+    },
+    MarkupDeclarationOpen {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchConsume("--")) {
+                t.createCommentPending();
+                t.transition(CommentStart);
+            } else if (r.matchConsumeIgnoreCase("DOCTYPE")) {
+                t.transition(Doctype);
+            } else if (r.matchConsume("[CDATA[")) {
+                // todo: should actually check current namepspace, and only non-html allows cdata. until namespace
+                // is implemented properly, keep handling as cdata
+                //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) {
+                t.createTempBuffer();
+                t.transition(CdataSection);
+            } else {
+                t.error(this);
+                t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind
+            }
+        }
+    },
+    CommentStart {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.transition(CommentStartDash);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.commentPending.data.append(replacementChar);
+                    t.transition(Comment);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.commentPending.data.append(c);
+                    t.transition(Comment);
+            }
+        }
+    },
+    CommentStartDash {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.transition(CommentStartDash);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.commentPending.data.append(replacementChar);
+                    t.transition(Comment);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.commentPending.data.append(c);
+                    t.transition(Comment);
+            }
+        }
+    },
+    Comment {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.current();
+            switch (c) {
+                case '-':
+                    t.advanceTransition(CommentEndDash);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    r.advance();
+                    t.commentPending.data.append(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.commentPending.data.append(r.consumeToAny('-', nullChar));
+            }
+        }
+    },
+    CommentEndDash {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.transition(CommentEnd);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.commentPending.data.append('-').append(replacementChar);
+                    t.transition(Comment);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.commentPending.data.append('-').append(c);
+                    t.transition(Comment);
+            }
+        }
+    },
+    CommentEnd {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '>':
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.commentPending.data.append("--").append(replacementChar);
+                    t.transition(Comment);
+                    break;
+                case '!':
+                    t.error(this);
+                    t.transition(CommentEndBang);
+                    break;
+                case '-':
+                    t.error(this);
+                    t.commentPending.data.append('-');
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.commentPending.data.append("--").append(c);
+                    t.transition(Comment);
+            }
+        }
+    },
+    CommentEndBang {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '-':
+                    t.commentPending.data.append("--!");
+                    t.transition(CommentEndDash);
+                    break;
+                case '>':
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.commentPending.data.append("--!").append(replacementChar);
+                    t.transition(Comment);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.emitCommentPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.commentPending.data.append("--!").append(c);
+                    t.transition(Comment);
+            }
+        }
+    },
+    Doctype {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeDoctypeName);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    // note: fall through to > case
+                case '>': // catch invalid <!DOCTYPE>
+                    t.error(this);
+                    t.createDoctypePending();
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.transition(BeforeDoctypeName);
+            }
+        }
+    },
+    BeforeDoctypeName {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                t.createDoctypePending();
+                t.transition(DoctypeName);
+                return;
+            }
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break; // ignore whitespace
+                case nullChar:
+                    t.error(this);
+                    t.createDoctypePending();
+                    t.doctypePending.name.append(replacementChar);
+                    t.transition(DoctypeName);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.createDoctypePending();
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.createDoctypePending();
+                    t.doctypePending.name.append(c);
+                    t.transition(DoctypeName);
+            }
+        }
+    },
+    DoctypeName {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.matchesLetter()) {
+                String name = r.consumeLetterSequence();
+                t.doctypePending.name.append(name);
+                return;
+            }
+            char c = r.consume();
+            switch (c) {
+                case '>':
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(AfterDoctypeName);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.doctypePending.name.append(replacementChar);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.doctypePending.name.append(c);
+            }
+        }
+    },
+    AfterDoctypeName {
+        void read(Tokeniser t, CharacterReader r) {
+            if (r.isEmpty()) {
+                t.eofError(this);
+                t.doctypePending.forceQuirks = true;
+                t.emitDoctypePending();
+                t.transition(Data);
+                return;
+            }
+            if (r.matchesAny('\t', '\n', '\r', '\f', ' '))
+                r.advance(); // ignore whitespace
+            else if (r.matches('>')) {
+                t.emitDoctypePending();
+                t.advanceTransition(Data);
+            } else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) {
+                t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY;
+                t.transition(AfterDoctypePublicKeyword);
+            } else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) {
+                t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY;
+                t.transition(AfterDoctypeSystemKeyword);
+            } else {
+                t.error(this);
+                t.doctypePending.forceQuirks = true;
+                t.advanceTransition(BogusDoctype);
+            }
+
+        }
+    },
+    AfterDoctypePublicKeyword {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeDoctypePublicIdentifier);
+                    break;
+                case '"':
+                    t.error(this);
+                    // set public id to empty string
+                    t.transition(DoctypePublicIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    t.error(this);
+                    // set public id to empty string
+                    t.transition(DoctypePublicIdentifier_singleQuoted);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.transition(BogusDoctype);
+            }
+        }
+    },
+    BeforeDoctypePublicIdentifier {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break;
+                case '"':
+                    // set public id to empty string
+                    t.transition(DoctypePublicIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    // set public id to empty string
+                    t.transition(DoctypePublicIdentifier_singleQuoted);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.transition(BogusDoctype);
+            }
+        }
+    },
+    DoctypePublicIdentifier_doubleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '"':
+                    t.transition(AfterDoctypePublicIdentifier);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.doctypePending.publicIdentifier.append(replacementChar);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.doctypePending.publicIdentifier.append(c);
+            }
+        }
+    },
+    DoctypePublicIdentifier_singleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\'':
+                    t.transition(AfterDoctypePublicIdentifier);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.doctypePending.publicIdentifier.append(replacementChar);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.doctypePending.publicIdentifier.append(c);
+            }
+        }
+    },
+    AfterDoctypePublicIdentifier {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BetweenDoctypePublicAndSystemIdentifiers);
+                    break;
+                case '>':
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case '"':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_singleQuoted);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.transition(BogusDoctype);
+            }
+        }
+    },
+    BetweenDoctypePublicAndSystemIdentifiers {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break;
+                case '>':
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case '"':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_singleQuoted);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.transition(BogusDoctype);
+            }
+        }
+    },
+    AfterDoctypeSystemKeyword {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeDoctypeSystemIdentifier);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case '"':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    t.error(this);
+                    // system id empty
+                    t.transition(DoctypeSystemIdentifier_singleQuoted);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+            }
+        }
+    },
+    BeforeDoctypeSystemIdentifier {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break;
+                case '"':
+                    // set system id to empty string
+                    t.transition(DoctypeSystemIdentifier_doubleQuoted);
+                    break;
+                case '\'':
+                    // set public id to empty string
+                    t.transition(DoctypeSystemIdentifier_singleQuoted);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.transition(BogusDoctype);
+            }
+        }
+    },
+    DoctypeSystemIdentifier_doubleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '"':
+                    t.transition(AfterDoctypeSystemIdentifier);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.doctypePending.systemIdentifier.append(replacementChar);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.doctypePending.systemIdentifier.append(c);
+            }
+        }
+    },
+    DoctypeSystemIdentifier_singleQuoted {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\'':
+                    t.transition(AfterDoctypeSystemIdentifier);
+                    break;
+                case nullChar:
+                    t.error(this);
+                    t.doctypePending.systemIdentifier.append(replacementChar);
+                    break;
+                case '>':
+                    t.error(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.doctypePending.systemIdentifier.append(c);
+            }
+        }
+    },
+    AfterDoctypeSystemIdentifier {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    break;
+                case '>':
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.eofError(this);
+                    t.doctypePending.forceQuirks = true;
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.error(this);
+                    t.transition(BogusDoctype);
+                    // NOT force quirks
+            }
+        }
+    },
+    BogusDoctype {
+        void read(Tokeniser t, CharacterReader r) {
+            char c = r.consume();
+            switch (c) {
+                case '>':
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                case eof:
+                    t.emitDoctypePending();
+                    t.transition(Data);
+                    break;
+                default:
+                    // ignore char
+                    break;
+            }
+        }
+    },
+    CdataSection {
+        void read(Tokeniser t, CharacterReader r) {
+            String data = r.consumeTo("]]>");
+            t.dataBuffer.append(data);
+            if (r.matchConsume("]]>") || r.isEmpty()) {
+                t.emit(new Token.CData(t.dataBuffer.toString()));
+                t.transition(Data);
+            }// otherwise, buffer underrun, stay in data section
+        }
+    };
+
+
+    abstract void read(Tokeniser t, CharacterReader r);
+
+    static final char nullChar = '\u0000';
+    // char searches. must be sorted, used in inSorted. MUST update TokenisetStateTest if more arrays are added.
+    static final char[] attributeSingleValueCharsSorted = new char[]{nullChar, '&', '\''};
+    static final char[] attributeDoubleValueCharsSorted = new char[]{nullChar, '"', '&'};
+    static final char[] attributeNameCharsSorted = new char[]{nullChar, '\t', '\n', '\f', '\r', ' ', '"', '\'', '/', '<', '=', '>'};
+    static final char[] attributeValueUnquoted = new char[]{nullChar, '\t', '\n', '\f', '\r', ' ', '"', '&', '\'', '<', '=', '>', '`'};
+
+    private static final char replacementChar = Tokeniser.replacementChar;
+    private static final String replacementStr = String.valueOf(Tokeniser.replacementChar);
+    private static final char eof = CharacterReader.EOF;
+
+    /**
+     * Handles RawtextEndTagName, ScriptDataEndTagName, and ScriptDataEscapedEndTagName. Same body impl, just
+     * different else exit transitions.
+     */
+    private static void handleDataEndTag(Tokeniser t, CharacterReader r, TokeniserState elseTransition) {
+        if (r.matchesLetter()) {
+            String name = r.consumeLetterSequence();
+            t.tagPending.appendTagName(name);
+            t.dataBuffer.append(name);
+            return;
+        }
+
+        boolean needsExitTransition = false;
+        if (t.isAppropriateEndTagToken() && !r.isEmpty()) {
+            char c = r.consume();
+            switch (c) {
+                case '\t':
+                case '\n':
+                case '\r':
+                case '\f':
+                case ' ':
+                    t.transition(BeforeAttributeName);
+                    break;
+                case '/':
+                    t.transition(SelfClosingStartTag);
+                    break;
+                case '>':
+                    t.emitTagPending();
+                    t.transition(Data);
+                    break;
+                default:
+                    t.dataBuffer.append(c);
+                    needsExitTransition = true;
+            }
+        } else {
+            needsExitTransition = true;
+        }
+
+        if (needsExitTransition) {
+            t.emit("</" + t.dataBuffer.toString());
+            t.transition(elseTransition);
+        }
+    }
+
+    private static void readData(Tokeniser t, CharacterReader r, TokeniserState current, TokeniserState advance) {
+        switch (r.current()) {
+            case '<':
+                t.advanceTransition(advance);
+                break;
+            case nullChar:
+                t.error(current);
+                r.advance();
+                t.emit(replacementChar);
+                break;
+            case eof:
+                t.emit(new Token.EOF());
+                break;
+            default:
+                String data = r.consumeToAny('<', nullChar); // todo - why hunt for null here? Just consumeTo'<'?
+                t.emit(data);
+                break;
+        }
+    }
+
+    private static void readCharRef(Tokeniser t, TokeniserState advance) {
+        int[] c = t.consumeCharacterReference(null, false);
+        if (c == null)
+            t.emit('&');
+        else
+            t.emit(c);
+        t.transition(advance);
+    }
+
+    private static void readEndTag(Tokeniser t, CharacterReader r, TokeniserState a, TokeniserState b) {
+        if (r.matchesLetter()) {
+            t.createTagPending(false);
+            t.transition(a);
+        } else {
+            t.emit("</");
+            t.transition(b);
+        }
+    }
+
+    private static void handleDataDoubleEscapeTag(Tokeniser t, CharacterReader r, TokeniserState primary, TokeniserState fallback) {
+        if (r.matchesLetter()) {
+            String name = r.consumeLetterSequence();
+            t.dataBuffer.append(name);
+            t.emit(name);
+            return;
+        }
+
+        char c = r.consume();
+        switch (c) {
+            case '\t':
+            case '\n':
+            case '\r':
+            case '\f':
+            case ' ':
+            case '/':
+            case '>':
+                if (t.dataBuffer.toString().equals("script"))
+                    t.transition(primary);
+                else
+                    t.transition(fallback);
+                t.emit(c);
+                break;
+            default:
+                r.unconsume();
+                t.transition(fallback);
+        }
+    }
+}
diff --git a/settings.gradle b/settings.gradle
index 29dc38f9..11a2f10c 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -1 +1,2 @@
-include ':app', ':library', ':library-image-loader', ':library-view', ':sample-custom-extension', ':library-syntax'
+include ':app', ':library', ':library-image-loader', ':library-view', ':sample-custom-extension',
+        ':library-syntax', ':html-parser-api', ':html-parser-impl'