Improve empty tag replacement

This commit is contained in:
Dimitry Ivanov 2018-08-19 18:46:55 +03:00
parent 5c9ba0f252
commit c7c998db8f
9 changed files with 163 additions and 69 deletions

View File

@ -2,6 +2,10 @@ package ru.noties.markwon.html.api;
import android.support.annotation.NonNull; import android.support.annotation.NonNull;
/**
* @see MarkwonHtmlParser
* @since 2.0.0
*/
class MarkwonHtmlParserNoOp extends MarkwonHtmlParser { class MarkwonHtmlParserNoOp extends MarkwonHtmlParser {
@Override @Override

View File

@ -3,14 +3,15 @@ package ru.noties.markwon.html.impl;
import android.support.annotation.NonNull; import android.support.annotation.NonNull;
import android.support.annotation.Nullable; import android.support.annotation.Nullable;
import ru.noties.markwon.html.impl.jsoup.parser.Token; import ru.noties.markwon.html.api.HtmlTag;
/** /**
* This class will be used to append some text to output in order to * This class will be used to append some text to output in order to
* apply a Span for this tag. Please note that this class will be used for * apply a Span for this tag. Please note that this class will be used for
* _void_ tags and tags that are self-closed (even if HTML spec doesn\'t specify * _void_ tags and tags that are self-closed (even if HTML spec doesn\'t specify
* a tag as self-closed). This is due to the fact that underlying parser does not * a tag as self-closed). This is due to the fact that underlying parser does not
* validate context and does not check if a tag is correctly used. * validate context and does not check if a tag is correctly used. Plus it will be
* used for tags without content, for example: {@code <my-custom-element></my-custom-element>}
* *
* @since 2.0.0 * @since 2.0.0
*/ */
@ -28,15 +29,16 @@ public class HtmlEmptyTagReplacement {
* lead to `Inline` tag have start &amp; end the same value, thus not applicable for applying a Span) * lead to `Inline` tag have start &amp; end the same value, thus not applicable for applying a Span)
*/ */
@Nullable @Nullable
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
final String replacement; final String replacement;
final String name = startTag.normalName; final String name = tag.name();
if ("br".equals(name)) { if ("br".equals(name)) {
replacement = "\n"; replacement = "\n";
} else if ("img".equals(name)) { } else if ("img".equals(name)) {
final String alt = startTag.attributes.getIgnoreCase("alt"); final String alt = tag.attributes().get("alt");
if (alt == null if (alt == null
|| alt.length() == 0) { || alt.length() == 0) {
// no alt is provided // no alt is provided
@ -50,5 +52,4 @@ public class HtmlEmptyTagReplacement {
return replacement; return replacement;
} }
} }

View File

@ -119,6 +119,11 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
private boolean isInsidePreTag; private boolean isInsidePreTag;
private Tokeniser tokeniser;
private CharacterReader reader;
MarkwonHtmlParserImpl( MarkwonHtmlParserImpl(
@NonNull HtmlEmptyTagReplacement replacement, @NonNull HtmlEmptyTagReplacement replacement,
@NonNull TrimmingAppender trimmingAppender) { @NonNull TrimmingAppender trimmingAppender) {
@ -126,13 +131,14 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
this.trimmingAppender = trimmingAppender; this.trimmingAppender = trimmingAppender;
} }
@Override @Override
public <T extends Appendable & CharSequence> void processFragment( public <T extends Appendable & CharSequence> void processFragment(
@NonNull T output, @NonNull T output,
@NonNull String htmlFragment) { @NonNull String htmlFragment) {
// todo: maybe there is a way to reuse tokeniser... // we might want to reuse tokeniser (at least when the same output is involved)
// as CharacterReader does a bit of initialization (cache etc) as it's
// primary usage is parsing a document in one run (not parsing _fragments_)
final Tokeniser tokeniser = new Tokeniser(new CharacterReader(htmlFragment), ParseErrorList.noTracking()); final Tokeniser tokeniser = new Tokeniser(new CharacterReader(htmlFragment), ParseErrorList.noTracking());
while (true) { while (true) {
@ -239,7 +245,7 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
if (isVoidTag(name) if (isVoidTag(name)
|| startTag.selfClosing) { || startTag.selfClosing) {
final String replacement = emptyTagReplacement.replace(startTag); final String replacement = emptyTagReplacement.replace(inline);
if (replacement != null if (replacement != null
&& replacement.length() > 0) { && replacement.length() > 0) {
appendQuietly(output, replacement); appendQuietly(output, replacement);
@ -261,6 +267,12 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
// try to find it, if none found -> ignore // try to find it, if none found -> ignore
final HtmlTagImpl.InlineImpl openInline = findOpenInlineTag(endTag.normalName); final HtmlTagImpl.InlineImpl openInline = findOpenInlineTag(endTag.normalName);
if (openInline != null) { if (openInline != null) {
// okay, if this tag is empty -> call replacement
if (isEmpty(output, openInline)) {
appendEmptyTagReplacement(output, openInline);
}
// close open inline tag // close open inline tag
openInline.closeAt(output.length()); openInline.closeAt(output.length());
} }
@ -301,7 +313,7 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
final boolean isVoid = isVoidTag(name) || startTag.selfClosing; final boolean isVoid = isVoidTag(name) || startTag.selfClosing;
if (isVoid) { if (isVoid) {
final String replacement = emptyTagReplacement.replace(startTag); final String replacement = emptyTagReplacement.replace(block);
if (replacement != null if (replacement != null
&& replacement.length() > 0) { && replacement.length() > 0) {
appendQuietly(output, replacement); appendQuietly(output, replacement);
@ -331,6 +343,11 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
isInsidePreTag = false; isInsidePreTag = false;
} }
// okay, if this tag is empty -> call replacement
if (isEmpty(output, block)) {
appendEmptyTagReplacement(output, block);
}
block.closeAt(output.length()); block.closeAt(output.length());
if (TAG_PARAGRAPH.equals(name)) { if (TAG_PARAGRAPH.equals(name)) {
@ -434,4 +451,19 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
return map; return map;
} }
protected static <T extends Appendable & CharSequence> boolean isEmpty(
@NonNull T output,
@NonNull HtmlTagImpl tag) {
return tag.start == output.length();
}
protected <T extends Appendable & CharSequence> void appendEmptyTagReplacement(
@NonNull T output,
@NonNull HtmlTagImpl tag) {
final String replacement = emptyTagReplacement.replace(tag);
if (replacement != null) {
appendQuietly(output, replacement);
}
}
} }

View File

@ -250,7 +250,8 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
@Override @Override
public Attribute next() { public Attribute next() {
final Attribute attr = new Attribute(keys[i], vals[i], Attributes.this); final String val = vals[i];
final Attribute attr = new Attribute(keys[i], val == null ? "" : val, Attributes.this);
i++; i++;
return attr; return attr;
} }
@ -262,21 +263,21 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}; };
} }
/** // /**
Get the attributes as a List, for iteration. // Get the attributes as a List, for iteration.
@return an view of the attributes as an unmodifialbe List. // @return an view of the attributes as an unmodifialbe List.
*/ // */
public List<Attribute> asList() { // public List<Attribute> asList() {
ArrayList<Attribute> list = new ArrayList<>(size); // ArrayList<Attribute> list = new ArrayList<>(size);
for (int i = 0; i < size; i++) { // for (int i = 0; i < size; i++) {
// Attribute attr = vals[i] == null ? //// Attribute attr = vals[i] == null ?
// new BooleanAttribute(keys[i]) : // deprecated class, but maybe someone still wants it //// new BooleanAttribute(keys[i]) : // deprecated class, but maybe someone still wants it
// new Attribute(keys[i], vals[i], Attributes.this); //// new Attribute(keys[i], vals[i], Attributes.this);
// list.add(attr); //// list.add(attr);
list.add(new Attribute(keys[i], vals[i], Attributes.this)); // list.add(new Attribute(keys[i], vals[i], Attributes.this));
} // }
return Collections.unmodifiableList(list); // return Collections.unmodifiableList(list);
} // }
// /** // /**
// * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys // * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys

View File

@ -1,5 +1,7 @@
package ru.noties.markwon.html.impl.jsoup.parser; package ru.noties.markwon.html.impl.jsoup.parser;
import android.support.annotation.NonNull;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
@ -10,12 +12,12 @@ import ru.noties.markwon.html.impl.jsoup.UncheckedIOException;
import ru.noties.markwon.html.impl.jsoup.helper.Validate; import ru.noties.markwon.html.impl.jsoup.helper.Validate;
/** /**
CharacterReader consumes tokens off a string. Used internally by jsoup. API subject to changes. * CharacterReader consumes tokens off a string. Used internally by jsoup. API subject to changes.
*/ */
public final class CharacterReader { public final class CharacterReader {
static final char EOF = (char) -1; static final char EOF = (char) -1;
private static final int maxStringCacheLen = 12; private static final int maxStringCacheLen = 12;
static final int maxBufferLen = 1024 * 32; // visible for testing static final int maxBufferLen = 1024 * 4; // visible for testing
private static final int readAheadLimit = (int) (maxBufferLen * 0.75); private static final int readAheadLimit = (int) (maxBufferLen * 0.75);
private final char[] charBuf; private final char[] charBuf;
@ -25,13 +27,13 @@ public final class CharacterReader {
private int bufPos; private int bufPos;
private int readerPos; private int readerPos;
private int bufMark; private int bufMark;
private final String[] stringCache = new String[512]; // holds reused strings in this doc, to lessen garbage private final String[] stringCache = new String[128]; // holds reused strings in this doc, to lessen garbage
public CharacterReader(Reader input, int sz) { public CharacterReader(Reader input, int sz) {
Validate.notNull(input); Validate.notNull(input);
Validate.isTrue(input.markSupported()); Validate.isTrue(input.markSupported());
reader = input; reader = input;
charBuf = new char[sz > maxBufferLen ? maxBufferLen : sz]; charBuf = new char[maxBufferLen];
bufferUp(); bufferUp();
} }
@ -43,6 +45,15 @@ public final class CharacterReader {
this(new StringReader(input), input.length()); this(new StringReader(input), input.length());
} }
// public void swapInput(@NonNull String input) {
// reader = new StringReader(input);
// bufLength = 0;
// bufSplitPoint = 0;
// bufPos = 0;
// readerPos = 0;
// bufferUp();
// }
private void bufferUp() { private void bufferUp() {
if (bufPos < bufSplitPoint) if (bufPos < bufSplitPoint)
return; return;
@ -66,6 +77,7 @@ public final class CharacterReader {
/** /**
* Gets the current cursor position in the content. * Gets the current cursor position in the content.
*
* @return current position * @return current position
*/ */
public int pos() { public int pos() {
@ -74,6 +86,7 @@ public final class CharacterReader {
/** /**
* Tests if all the content has been read. * Tests if all the content has been read.
*
* @return true if nothing left to read. * @return true if nothing left to read.
*/ */
public boolean isEmpty() { public boolean isEmpty() {
@ -87,6 +100,7 @@ public final class CharacterReader {
/** /**
* Get the char at the current position. * Get the char at the current position.
*
* @return char * @return char
*/ */
public char current() { public char current() {
@ -122,6 +136,7 @@ public final class CharacterReader {
/** /**
* Returns the number of characters between the current position and the next instance of the input char * Returns the number of characters between the current position and the next instance of the input char
*
* @param c scan target * @param c scan target
* @return offset between current position and next instance of target. -1 if not found. * @return offset between current position and next instance of target. -1 if not found.
*/ */
@ -162,6 +177,7 @@ public final class CharacterReader {
/** /**
* Reads characters up to the specific char. * Reads characters up to the specific char.
*
* @param c the delimiter * @param c the delimiter
* @return the chars read * @return the chars read
*/ */
@ -189,6 +205,7 @@ public final class CharacterReader {
/** /**
* Read characters until the first of any delimiters is found. * Read characters until the first of any delimiters is found.
*
* @param chars delimiters to scan for * @param chars delimiters to scan for
* @return characters read up to the matched delimiter. * @return characters read up to the matched delimiter.
*/ */
@ -198,7 +215,8 @@ public final class CharacterReader {
final int remaining = bufLength; final int remaining = bufLength;
final char[] val = charBuf; final char[] val = charBuf;
OUTER: while (bufPos < remaining) { OUTER:
while (bufPos < remaining) {
for (char c : chars) { for (char c : chars) {
if (val[bufPos] == c) if (val[bufPos] == c)
break OUTER; break OUTER;

View File

@ -3,8 +3,10 @@ package ru.noties.markwon.html.impl;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import ru.noties.markwon.html.impl.jsoup.nodes.Attributes; import java.util.Collections;
import ru.noties.markwon.html.impl.jsoup.parser.Token;
import ru.noties.markwon.html.api.HtmlTag;
import ru.noties.markwon.html.impl.HtmlTagImpl.InlineImpl;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
@ -19,24 +21,27 @@ public class HtmlEmptyTagReplacementTest {
@Test @Test
public void imageReplacementNoAlt() { public void imageReplacementNoAlt() {
final Token.StartTag startTag = new Token.StartTag(); final HtmlTag.Inline img = new InlineImpl("img", -1, Collections.<String, String>emptyMap());
startTag.normalName = "img"; assertEquals("\uFFFC", replacement.replace(img));
assertEquals("\uFFFC", replacement.replace(startTag));
} }
@Test @Test
public void imageReplacementAlt() { public void imageReplacementAlt() {
final Token.StartTag startTag = new Token.StartTag(); final HtmlTag.Inline img = new InlineImpl(
startTag.normalName = "img"; "img",
startTag.attributes = new Attributes().put("alt", "alternative27"); -1,
assertEquals("alternative27", replacement.replace(startTag)); Collections.singletonMap("alt", "alternative27")
);
assertEquals("alternative27", replacement.replace(img));
} }
@Test @Test
public void brAddsNewLine() { public void brAddsNewLine() {
final Token.StartTag startTag = new Token.StartTag(); final HtmlTag.Inline br = new InlineImpl(
startTag.normalName = "br"; "br",
startTag.selfClosing = true; -1,
assertEquals("\n", replacement.replace(startTag)); Collections.<String, String>emptyMap()
);
assertEquals("\n", replacement.replace(br));
} }
} }

View File

@ -17,7 +17,6 @@ import java.util.Set;
import ru.noties.markwon.html.api.HtmlTag; import ru.noties.markwon.html.api.HtmlTag;
import ru.noties.markwon.html.api.MarkwonHtmlParser; import ru.noties.markwon.html.api.MarkwonHtmlParser;
import ru.noties.markwon.html.impl.jsoup.parser.Token;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
@ -36,8 +35,8 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() { final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable @Nullable
@Override @Override
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
return startTag.normalName; return tag.name();
} }
}); });
@ -98,7 +97,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() { final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable @Nullable
@Override @Override
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
return null; return null;
} }
}); });
@ -143,7 +142,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() { final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable @Nullable
@Override @Override
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
return null; return null;
} }
}); });
@ -212,7 +211,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() { final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable @Nullable
@Override @Override
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
return null; return null;
} }
}); });
@ -278,10 +277,9 @@ public class MarkwonHtmlParserImplTest {
); );
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() { final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable
@Override @Override
public String replace(@NonNull Token.StartTag startTag) { public String replace(@NonNull HtmlTag tag) {
return startTag.normalName; return tag.name();
} }
}); });
@ -473,8 +471,6 @@ public class MarkwonHtmlParserImplTest {
final StringBuilder output = new StringBuilder(); final StringBuilder output = new StringBuilder();
impl.processFragment(output, "<DiV><I>italic <eM>emphasis</Em> italic</i></dIv>"); impl.processFragment(output, "<DiV><I>italic <eM>emphasis</Em> italic</i></dIv>");
System.out.printf("output: `%s`%n", output);
final CaptureInlineTagsAction inlineTagsAction = new CaptureInlineTagsAction(); final CaptureInlineTagsAction inlineTagsAction = new CaptureInlineTagsAction();
final CaptureBlockTagsAction blockTagsAction = new CaptureBlockTagsAction(); final CaptureBlockTagsAction blockTagsAction = new CaptureBlockTagsAction();
@ -804,6 +800,39 @@ public class MarkwonHtmlParserImplTest {
assertEquals(Arrays.toString(split), 5, split.length); assertEquals(Arrays.toString(split), 5, split.length);
} }
@Test
public void attributesAreLowerCase() {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create();
final StringBuilder output = new StringBuilder();
impl.processFragment(output, "<i CLASS=\"my-class\" dIsAbLeD @HeLLo=\"there\">");
final CaptureInlineTagsAction action = new CaptureInlineTagsAction();
impl.flushInlineTags(output.length(), action);
assertTrue(action.called);
assertEquals(1, action.tags.size());
with(action.tags.get(0), new Action<HtmlTag.Inline>() {
@Override
public void apply(@NonNull HtmlTag.Inline inline) {
assertEquals("i", inline.name());
with(inline.attributes(), new Action<Map<String, String>>() {
@Override
public void apply(@NonNull Map<String, String> map) {
assertEquals(3, map.size());
assertEquals("my-class", map.get("class"));
assertEquals("", map.get("disabled"));
assertEquals("there", map.get("@hello"));
}
});
}
});
}
private static class CaptureTagsAction<T> implements MarkwonHtmlParser.FlushAction<T> { private static class CaptureTagsAction<T> implements MarkwonHtmlParser.FlushAction<T> {
boolean called; boolean called;

View File

@ -22,13 +22,12 @@ import java.util.Iterator;
@SuppressWarnings({"WeakerAccess", "unused"}) @SuppressWarnings({"WeakerAccess", "unused"})
public class SpannableBuilder implements Appendable, CharSequence { public class SpannableBuilder implements Appendable, CharSequence {
/**
* @since 2.0.0
*/
public static void setSpans(@NonNull SpannableBuilder builder, @Nullable Object spans, int start, int end) { public static void setSpans(@NonNull SpannableBuilder builder, @Nullable Object spans, int start, int end) {
if (spans != null) { if (spans != null) {
// let's filter non-valid positions here, so there is no need to validate
// it whilst applying non-closed html tags
//
// setting a span for an invalid position can lead to silent fail (no exception, // setting a span for an invalid position can lead to silent fail (no exception,
// but execution is stopped) // but execution is stopped)
if (!isPositionValid(builder.length(), start, end)) { if (!isPositionValid(builder.length(), start, end)) {

View File

@ -22,6 +22,11 @@ public abstract class TagHandler {
TagHandler handler; TagHandler handler;
for (HtmlTag.Block child : block.children()) { for (HtmlTag.Block child : block.children()) {
if (!child.isClosed()) {
continue;
}
handler = configuration.htmlRenderer().tagHandler(child.name()); handler = configuration.htmlRenderer().tagHandler(child.name());
if (handler != null) { if (handler != null) {
handler.handle(configuration, builder, child); handler.handle(configuration, builder, child);