diff --git a/build.gradle b/build.gradle
index c0b701ed..31ab153d 100644
--- a/build.gradle
+++ b/build.gradle
@@ -52,4 +52,7 @@ ext {
PRISM_4J = 'ru.noties:prism4j:1.1.0'
PRISM_4J_BUNDLER = 'ru.noties:prism4j-bundler:1.1.0'
+
+ JUNIT = 'junit:junit:4.12'
+ ROBOLECTRIC = 'org.robolectric:robolectric:3.8'
}
diff --git a/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java b/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
index 896f0d40..153b4a77 100644
--- a/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
+++ b/html-parser-api/src/main/java/ru/noties/markwon/html/HtmlTag.java
@@ -27,6 +27,11 @@ public interface HtmlTag {
*/
int end();
+ /**
+ * @return flag indicating if this tag has no content (when start == end)
+ */
+ boolean isEmpty();
+
/**
* Represents really inline HTML tags (unline commonmark definitions)
*/
diff --git a/html-parser-impl/build.gradle b/html-parser-impl/build.gradle
index f1c8860f..f8a446d5 100644
--- a/html-parser-impl/build.gradle
+++ b/html-parser-impl/build.gradle
@@ -14,8 +14,12 @@ android {
}
dependencies {
+
api SUPPORT_ANNOTATIONS
api project(':html-parser-api')
+
+ testImplementation JUNIT
+ testImplementation ROBOLECTRIC
}
afterEvaluate {
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlEmptyTagReplacement.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlEmptyTagReplacement.java
new file mode 100644
index 00000000..75f1184b
--- /dev/null
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlEmptyTagReplacement.java
@@ -0,0 +1,52 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+import ru.noties.markwon.html.jsoup.parser.Token;
+
+/**
+ * This class will be used to append some text to output in order to
+ * apply a Span for this tag. Please note that this class will be used for
+ * _void_ tags and tags that are self-closed (even if HTML spec doesn\'t specify
+ * a tag as self-closed). This is due to the fact that underlying parser does not
+ * validate context and does not check if a tag is correctly used.
+ */
+public class HtmlEmptyTagReplacement {
+
+ @NonNull
+ public static HtmlEmptyTagReplacement create() {
+ return new HtmlEmptyTagReplacement();
+ }
+
+ private static final String IMG_REPLACEMENT = "\uFFFC";
+
+ /**
+ * @return replacement for supplied startTag or null if no replacement should occur (which will
+ * lead to `Inline` tag have start & end the same value, thus not applicable for applying a Span)
+ */
+ @Nullable
+ public String replace(@NonNull Token.StartTag startTag) {
+
+ final String replacement;
+
+ final String name = startTag.normalName;
+ if ("br".equals(name)) {
+ replacement = "\n";
+ } else if ("img".equals(name)) {
+ final String alt = startTag.attributes.getIgnoreCase("alt");
+ if (alt == null
+ || alt.length() == 0) {
+ // no alt is provided
+ replacement = IMG_REPLACEMENT;
+ } else {
+ replacement = alt;
+ }
+ } else {
+ replacement = null;
+ }
+
+ return replacement;
+ }
+
+}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
index 3f8083dd..4f6ca844 100644
--- a/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/HtmlTagImpl.java
@@ -8,7 +8,7 @@ import java.util.List;
abstract class HtmlTagImpl implements HtmlTag {
- static final int NO_VALUE = -1;
+ private static final int NO_VALUE = -1;
final String name;
final int start;
@@ -35,13 +35,17 @@ abstract class HtmlTagImpl implements HtmlTag {
return end;
}
+ @Override
+ public boolean isEmpty() {
+ return start == end;
+ }
+
boolean isClosed() {
return end > NO_VALUE;
}
abstract void closeAt(int end);
-
static class InlineImpl extends HtmlTagImpl implements Inline {
InlineImpl(@NonNull String name, int start) {
@@ -54,6 +58,15 @@ abstract class HtmlTagImpl implements HtmlTag {
super.end = end;
}
}
+
+ @Override
+ public String toString() {
+ return "InlineImpl{" +
+ "name='" + name + '\'' +
+ ", start=" + start +
+ ", end=" + end +
+ '}';
+ }
}
static class BlockImpl extends HtmlTagImpl implements Block {
@@ -83,7 +96,7 @@ abstract class HtmlTagImpl implements HtmlTag {
if (!isClosed()) {
super.end = end;
if (children != null) {
- for (BlockImpl child: children) {
+ for (BlockImpl child : children) {
child.closeAt(end);
}
children = Collections.unmodifiableList(children);
@@ -113,5 +126,16 @@ abstract class HtmlTagImpl implements HtmlTag {
//noinspection unchecked
return (List) (List extends Block>) children;
}
+
+ @Override
+ public String toString() {
+ return "BlockImpl{" +
+ "name='" + name + '\'' +
+ ", start=" + start +
+ ", end=" + end +
+ ", parent=" + (parent != null ? parent.name : null) +
+ ", children=" + children +
+ '}';
+ }
}
}
diff --git a/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java b/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
index 4fcd4a5d..b6b731a8 100644
--- a/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
+++ b/html-parser-impl/src/main/java/ru/noties/markwon/html/MarkwonHtmlParserImpl.java
@@ -24,7 +24,12 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
@NonNull
public static MarkwonHtmlParserImpl create() {
- return new MarkwonHtmlParserImpl();
+ return create(HtmlEmptyTagReplacement.create());
+ }
+
+ @NonNull
+ public static MarkwonHtmlParserImpl create(@NonNull HtmlEmptyTagReplacement inlineTagReplacement) {
+ return new MarkwonHtmlParserImpl(inlineTagReplacement);
}
// https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
@@ -43,7 +48,7 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
private static final String TAG_LIST_ITEM = "li";
// todo: make it configurable
- private static final String IMG_REPLACEMENT = "\uFFFC";
+// private static final String IMG_REPLACEMENT = "\uFFFC";
static {
INLINE_TAGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
@@ -96,10 +101,16 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
)));
}
+ private final HtmlEmptyTagReplacement emptyTagReplacement;
+
private final List inlineTags = new ArrayList<>(0);
private BlockImpl currentBlock = BlockImpl.root();
+ MarkwonHtmlParserImpl(@NonNull HtmlEmptyTagReplacement replacement) {
+ this.emptyTagReplacement = replacement;
+ }
+
@Override
public void processFragment(
@@ -203,17 +214,19 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
if (isVoidTag(name)
|| startTag.selfClosing) {
- // check if we have content to append as we must close this tag here
- processVoidTag(output, startTag);
+ final String replacement = emptyTagReplacement.replace(startTag);
+ if (replacement != null
+ && replacement.length() > 0) {
+ append(output, replacement);
+ }
+ // the thing is: we will keep this inline tag in the list,
+ // but in case of void-tag that has no replacement, there will be no
+ // possibility to set a span (requires at least one char)
inline.closeAt(output.length());
}
- // actually only check if there is content for void/self-closing tags
- // if none -> ignore it
- if (inline.start != inline.end) {
- inlineTags.add(inline);
- }
+ inlineTags.add(inline);
}
protected void processInlineTagEnd(
@@ -236,16 +249,14 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
final String name = startTag.normalName;
// block tags (all that are NOT inline -> blocks
- // I think there is only one strong rule -> paragraph cannot contain anything
+ // there is only one strong rule -> paragraph cannot contain anything
// except inline tags
- // also, closing paragraph with non-closed inlines -> doesn't close inlines
- // they are continued for _afterwards_
if (TAG_PARAGRAPH.equals(currentBlock.name)) {
// it must be closed here not matter what we are as here we _assume_
// that it's a block tag
- append(output, "\n");
currentBlock.closeAt(output.length());
+ append(output, "\n");
currentBlock = currentBlock.parent;
} else if (TAG_LIST_ITEM.equals(name)
&& TAG_LIST_ITEM.equals(currentBlock.name)) {
@@ -262,10 +273,23 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
final BlockImpl block = BlockImpl.create(name, start, currentBlock);
+ final boolean isVoid = isVoidTag(name) || startTag.selfClosing;
+ if (isVoid) {
+ final String replacement = emptyTagReplacement.replace(startTag);
+ if (replacement != null
+ && replacement.length() > 0) {
+ append(output, replacement);
+ }
+ block.closeAt(output.length());
+ }
+
//noinspection ConstantConditions
appendBlockChild(block.parent, block);
- this.currentBlock = block;
+ // if not void start filling-in children
+ if (!isVoid) {
+ this.currentBlock = block;
+ }
}
protected void processBlockTagEnd(
@@ -277,37 +301,16 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
final BlockImpl block = findOpenBlockTag(endTag.normalName);
if (block != null) {
+ block.closeAt(output.length());
+
if (TAG_PARAGRAPH.equals(name)) {
append(output, "\n");
}
- block.closeAt(output.length());
this.currentBlock = block.parent;
}
}
- protected void processVoidTag(
- @NonNull T output,
- @NonNull Token.StartTag startTag) {
-
- final String name = startTag.normalName;
-
- if ("br".equals(name)) {
- append(output, "\n");
- } else if ("img".equals(name)) {
- final String alt = startTag.attributes.getIgnoreCase("alt");
- if (alt == null
- || alt.length() == 0) {
- // no alt is provided
- append(output, IMG_REPLACEMENT);
- } else {
- append(output, alt);
- }
- }
-
- // other tags are ignored
- }
-
protected void processCharacter(
@NonNull T output,
@NonNull Token.Character character) {
diff --git a/html-parser-impl/src/test/java/ru/noties/markwon/html/MarkwonHtmlParserImplTest.java b/html-parser-impl/src/test/java/ru/noties/markwon/html/MarkwonHtmlParserImplTest.java
new file mode 100644
index 00000000..f61d697b
--- /dev/null
+++ b/html-parser-impl/src/test/java/ru/noties/markwon/html/MarkwonHtmlParserImplTest.java
@@ -0,0 +1,412 @@
+package ru.noties.markwon.html;
+
+import android.support.annotation.NonNull;
+import android.support.annotation.Nullable;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.robolectric.RobolectricTestRunner;
+import org.robolectric.annotation.Config;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import ru.noties.markwon.html.jsoup.parser.Token;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+@RunWith(RobolectricTestRunner.class)
+@Config(manifest = Config.NONE)
+public class MarkwonHtmlParserImplTest {
+
+ @Test
+ public void inlineTags() {
+
+ // all inline tags are correctly parsed
+
+ // a simple replacement that will return tag name as replacement (for this test purposes)
+ final MarkwonHtmlParserImpl impl = new MarkwonHtmlParserImpl(new HtmlEmptyTagReplacement() {
+ @Nullable
+ @Override
+ public String replace(@NonNull Token.StartTag startTag) {
+ return startTag.normalName;
+ }
+ });
+
+ // all inline tags are parsed as ones
+ final List tags = Arrays.asList(
+ "a", "abbr", "acronym",
+ "b", "bdo", "big", "br", "button",
+ "cite", "code",
+ "dfn",
+ "em",
+ "i", "img", "input",
+ "kbd",
+ "label",
+ "map",
+ "object",
+ "q",
+ "samp", "script", "select", "small", "span", "strong", "sub", "sup",
+ "textarea", "time", "tt",
+ "var"
+ );
+
+ final StringBuilder html = new StringBuilder();
+ for (String tag : tags) {
+ html.append('<')
+ .append(tag)
+ .append('>')
+ .append(tag)
+ .append("")
+ .append(tag)
+ .append('>');
+ }
+
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, html.toString());
+
+ final CaptureInlineTagsAction action = new CaptureInlineTagsAction();
+
+ impl.flushInlineTags(output.length(), action);
+
+ assertTrue(action.called);
+
+ final List inlines = action.tags;
+
+ if (tags.size() != inlines.size()) {
+ final Set missing = new HashSet<>(tags);
+ for (HtmlTag.Inline inline : inlines) {
+ missing.remove(inline.name());
+ }
+ assertTrue("Missing inline tags: " + missing, false);
+ }
+
+ final Set set = new HashSet<>(tags);
+
+ for (HtmlTag.Inline inline : inlines) {
+ assertTrue(set.remove(inline.name()));
+ assertEquals(inline.name(), output.substring(inline.start(), inline.end()));
+ }
+
+ assertEquals(0, set.size());
+ }
+
+ @Test
+ public void inlineVoidTags() {
+
+ // all inline void tags are correctly parsed
+
+ final List tags = Arrays.asList(
+ "br",
+ "img", "input"
+ );
+
+ final MarkwonHtmlParserImpl impl = new MarkwonHtmlParserImpl(new HtmlEmptyTagReplacement() {
+ @Nullable
+ @Override
+ public String replace(@NonNull Token.StartTag startTag) {
+ return null;
+ }
+ });
+
+ final StringBuilder html = new StringBuilder();
+ for (String tag : tags) {
+ html.append('<')
+ .append(tag)
+ .append('>');
+ }
+
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, html.toString());
+
+ assertEquals(0, output.length());
+
+ final CaptureInlineTagsAction action = new CaptureInlineTagsAction();
+
+ impl.flushInlineTags(output.length(), action);
+
+ assertTrue(action.called);
+
+ final List inlines = action.tags;
+
+ assertEquals(inlines.toString(), tags.size(), inlines.size());
+
+ final Set set = new HashSet<>(tags);
+
+ for (HtmlTag.Inline inline : inlines) {
+ assertEquals(inline.name(), inline.start(), inline.end());
+ assertTrue(inline.name(), inline.isEmpty());
+ assertTrue(set.remove(inline.name()));
+ }
+
+ assertEquals(set.toString(), 0, set.size());
+ }
+
+ @Test
+ public void blockVoidTags() {
+
+ final MarkwonHtmlParserImpl impl = new MarkwonHtmlParserImpl(new HtmlEmptyTagReplacement() {
+ @Nullable
+ @Override
+ public String replace(@NonNull Token.StartTag startTag) {
+ return null;
+ }
+ });
+
+ final List tags = Arrays.asList(
+ "area",
+ "base",
+ "col",
+ "embed",
+ "hr",
+ "keygen",
+ "link",
+ "meta",
+ "param",
+ "source",
+ "track",
+ "wbr"
+ );
+
+ final StringBuilder html = new StringBuilder();
+ for (String tag : tags) {
+ html.append('<')
+ .append(tag)
+ .append('>');
+ }
+
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, html.toString());
+
+ assertEquals(0, output.length());
+
+ final CaptureBlockTagsAction action = new CaptureBlockTagsAction();
+ impl.flushBlockTags(output.length(), action);
+
+ assertTrue(action.called);
+
+ final List blocks = action.tags;
+
+ assertEquals(blocks.toString(), tags.size(), blocks.size());
+
+ final Set set = new HashSet<>(tags);
+
+ for (HtmlTag.Block block : blocks) {
+ assertEquals(block.name(), block.start(), block.end());
+ assertTrue(block.name(), block.isEmpty());
+ assertTrue(set.remove(block.name()));
+ }
+
+ assertEquals(set.toString(), 0, set.size());
+ }
+
+ @Test
+ public void selfClosingTags() {
+
+ // self-closing tags (grammatically) must be replaced (no checks for real html)
+
+ final List tags = Arrays.asList(
+ "one",
+ "two-two",
+ "three-three-three",
+ "four-four-four-four",
+ "FiveFiveFiveFiveFive"
+ );
+
+ final MarkwonHtmlParserImpl impl = new MarkwonHtmlParserImpl(new HtmlEmptyTagReplacement() {
+ @Nullable
+ @Override
+ public String replace(@NonNull Token.StartTag startTag) {
+ return null;
+ }
+ });
+
+ final StringBuilder html = new StringBuilder();
+ for (String tag : tags) {
+ html.append('<')
+ .append(tag)
+ .append(" />");
+ }
+
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, html.toString());
+
+ assertEquals(output.toString(), 0, output.length());
+
+ final CaptureBlockTagsAction action = new CaptureBlockTagsAction();
+
+ impl.flushBlockTags(output.length(), action);
+
+ assertTrue(action.called);
+
+ final List blocks = action.tags;
+
+ assertEquals(blocks.toString(), tags.size(), blocks.size());
+
+ // tag names must be lower cased
+ final Set set = new HashSet<>(tags.size());
+ for (String tag: tags) {
+ set.add(tag.toLowerCase());
+ }
+
+ for (HtmlTag.Block block: blocks) {
+ assertTrue(block.name(), block.isEmpty());
+ assertTrue(set.remove(block.name()));
+ }
+
+ assertEquals(set.toString(), 0, set.size());
+ }
+
+ @Test
+ public void blockTags() {
+
+ // the tags that will require a new line before them
+
+ final List tags = Arrays.asList(
+ "address", "article", "aside",
+ "blockquote",
+ "canvas",
+ "dd", "div", "dl", "dt",
+ "fieldset", "figcaption", "figure", "footer", "form",
+ "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr",
+ "li",
+ "main",
+ "nav", "noscript",
+ "ol", "output",
+ "p", "pre",
+ "section",
+ "table", "tfoot",
+ "ul",
+ "video"
+ );
+
+ final MarkwonHtmlParserImpl impl = new MarkwonHtmlParserImpl(new HtmlEmptyTagReplacement() {
+ @Nullable
+ @Override
+ public String replace(@NonNull Token.StartTag startTag) {
+ return startTag.normalName;
+ }
+ });
+
+ final StringBuilder html = new StringBuilder();
+ for (String tag: tags) {
+ html.append('<')
+ .append(tag)
+ .append('>')
+ .append(tag)
+ .append("")
+ .append(tag)
+ .append('>');
+ }
+
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, html.toString());
+
+ final CaptureBlockTagsAction action = new CaptureBlockTagsAction();
+
+ impl.flushBlockTags(output.length(), action);
+
+ assertTrue(action.called);
+
+ final List blocks = action.tags;
+ assertEquals(blocks.toString(), tags.size(), blocks.size());
+
+ final Set set = new HashSet<>(tags);
+
+ boolean first = true;
+ for (HtmlTag.Block block: blocks) {
+ assertEquals(block.name(), block.name(), output.substring(block.start(), block.end()));
+ if (first) {
+ first = false;
+ } else {
+ assertEquals('\n', output.charAt(block.start() - 1));
+ }
+ assertTrue(set.remove(block.name()));
+ }
+
+ assertEquals(set.toString(), 0, set.size());
+ }
+
+ @Test
+ public void multipleFragmentsContinuation() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void paragraphCannotContainAnythingButInlines() {
+ throw new RuntimeException();
+ }
+
+ // move to htmlInlineTagreplacement test class
+ @Test
+ public void imageReplacementNoAlt() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void brAddsNewLine() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void imageReplacementAlt() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void blockCloseClosesChildren() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void allReturnedTagsAreClosed() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void allTagsAreLowerCase() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void previousListItemClosed() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void nestedBlocks() {
+ throw new RuntimeException();
+ }
+
+ @Test
+ public void attributes() {
+ throw new RuntimeException();
+ }
+
+ private static class CaptureTagsAction implements MarkwonHtmlParser.FlushAction {
+
+ boolean called;
+ List tags;
+
+ @Override
+ public void apply(@NonNull List tags) {
+ this.called = true;
+ this.tags = new ArrayList<>(tags);
+ }
+ }
+
+ private static class CaptureInlineTagsAction extends CaptureTagsAction {
+ }
+
+ private static class CaptureBlockTagsAction extends CaptureTagsAction {
+ }
+}
\ No newline at end of file