diff --git a/markwon-html-parser-api/src/main/java/ru/noties/markwon/html/api/MarkwonHtmlParserNoOp.java b/markwon-html-parser-api/src/main/java/ru/noties/markwon/html/api/MarkwonHtmlParserNoOp.java
index fb55c5f8..f50a8941 100644
--- a/markwon-html-parser-api/src/main/java/ru/noties/markwon/html/api/MarkwonHtmlParserNoOp.java
+++ b/markwon-html-parser-api/src/main/java/ru/noties/markwon/html/api/MarkwonHtmlParserNoOp.java
@@ -2,6 +2,10 @@ package ru.noties.markwon.html.api;
import android.support.annotation.NonNull;
+/**
+ * @see MarkwonHtmlParser
+ * @since 2.0.0
+ */
class MarkwonHtmlParserNoOp extends MarkwonHtmlParser {
@Override
diff --git a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacement.java b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacement.java
index 6e852b1c..c0d304dc 100644
--- a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacement.java
+++ b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacement.java
@@ -3,14 +3,15 @@ package ru.noties.markwon.html.impl;
import android.support.annotation.NonNull;
import android.support.annotation.Nullable;
-import ru.noties.markwon.html.impl.jsoup.parser.Token;
+import ru.noties.markwon.html.api.HtmlTag;
/**
* This class will be used to append some text to output in order to
* apply a Span for this tag. Please note that this class will be used for
* _void_ tags and tags that are self-closed (even if HTML spec doesn\'t specify
* a tag as self-closed). This is due to the fact that underlying parser does not
- * validate context and does not check if a tag is correctly used.
+ * validate context and does not check if a tag is correctly used. Plus it will be
+ * used for tags without content, for example: {@code }
*
* @since 2.0.0
*/
@@ -28,15 +29,16 @@ public class HtmlEmptyTagReplacement {
* lead to `Inline` tag have start & end the same value, thus not applicable for applying a Span)
*/
@Nullable
- public String replace(@NonNull Token.StartTag startTag) {
+ public String replace(@NonNull HtmlTag tag) {
final String replacement;
- final String name = startTag.normalName;
+ final String name = tag.name();
+
if ("br".equals(name)) {
replacement = "\n";
} else if ("img".equals(name)) {
- final String alt = startTag.attributes.getIgnoreCase("alt");
+ final String alt = tag.attributes().get("alt");
if (alt == null
|| alt.length() == 0) {
// no alt is provided
@@ -50,5 +52,4 @@ public class HtmlEmptyTagReplacement {
return replacement;
}
-
}
diff --git a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImpl.java b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImpl.java
index 7ac819f9..791147d2 100644
--- a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImpl.java
+++ b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImpl.java
@@ -119,6 +119,11 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
private boolean isInsidePreTag;
+ private Tokeniser tokeniser;
+
+ private CharacterReader reader;
+
+
MarkwonHtmlParserImpl(
@NonNull HtmlEmptyTagReplacement replacement,
@NonNull TrimmingAppender trimmingAppender) {
@@ -126,13 +131,14 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
this.trimmingAppender = trimmingAppender;
}
-
@Override
public void processFragment(
@NonNull T output,
@NonNull String htmlFragment) {
- // todo: maybe there is a way to reuse tokeniser...
+ // we might want to reuse tokeniser (at least when the same output is involved)
+ // as CharacterReader does a bit of initialization (cache etc) as it's
+ // primary usage is parsing a document in one run (not parsing _fragments_)
final Tokeniser tokeniser = new Tokeniser(new CharacterReader(htmlFragment), ParseErrorList.noTracking());
while (true) {
@@ -239,7 +245,7 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
if (isVoidTag(name)
|| startTag.selfClosing) {
- final String replacement = emptyTagReplacement.replace(startTag);
+ final String replacement = emptyTagReplacement.replace(inline);
if (replacement != null
&& replacement.length() > 0) {
appendQuietly(output, replacement);
@@ -261,6 +267,12 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
// try to find it, if none found -> ignore
final HtmlTagImpl.InlineImpl openInline = findOpenInlineTag(endTag.normalName);
if (openInline != null) {
+
+ // okay, if this tag is empty -> call replacement
+ if (isEmpty(output, openInline)) {
+ appendEmptyTagReplacement(output, openInline);
+ }
+
// close open inline tag
openInline.closeAt(output.length());
}
@@ -301,7 +313,7 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
final boolean isVoid = isVoidTag(name) || startTag.selfClosing;
if (isVoid) {
- final String replacement = emptyTagReplacement.replace(startTag);
+ final String replacement = emptyTagReplacement.replace(block);
if (replacement != null
&& replacement.length() > 0) {
appendQuietly(output, replacement);
@@ -331,6 +343,11 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
isInsidePreTag = false;
}
+ // okay, if this tag is empty -> call replacement
+ if (isEmpty(output, block)) {
+ appendEmptyTagReplacement(output, block);
+ }
+
block.closeAt(output.length());
if (TAG_PARAGRAPH.equals(name)) {
@@ -434,4 +451,19 @@ public class MarkwonHtmlParserImpl extends MarkwonHtmlParser {
return map;
}
+
+ protected static boolean isEmpty(
+ @NonNull T output,
+ @NonNull HtmlTagImpl tag) {
+ return tag.start == output.length();
+ }
+
+ protected void appendEmptyTagReplacement(
+ @NonNull T output,
+ @NonNull HtmlTagImpl tag) {
+ final String replacement = emptyTagReplacement.replace(tag);
+ if (replacement != null) {
+ appendQuietly(output, replacement);
+ }
+ }
}
diff --git a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/nodes/Attributes.java b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/nodes/Attributes.java
index 9735e8f4..71494812 100644
--- a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/nodes/Attributes.java
+++ b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/nodes/Attributes.java
@@ -250,7 +250,8 @@ public class Attributes implements Iterable, Cloneable {
@Override
public Attribute next() {
- final Attribute attr = new Attribute(keys[i], vals[i], Attributes.this);
+ final String val = vals[i];
+ final Attribute attr = new Attribute(keys[i], val == null ? "" : val, Attributes.this);
i++;
return attr;
}
@@ -262,21 +263,21 @@ public class Attributes implements Iterable, Cloneable {
};
}
- /**
- Get the attributes as a List, for iteration.
- @return an view of the attributes as an unmodifialbe List.
- */
- public List asList() {
- ArrayList list = new ArrayList<>(size);
- for (int i = 0; i < size; i++) {
-// Attribute attr = vals[i] == null ?
-// new BooleanAttribute(keys[i]) : // deprecated class, but maybe someone still wants it
-// new Attribute(keys[i], vals[i], Attributes.this);
-// list.add(attr);
- list.add(new Attribute(keys[i], vals[i], Attributes.this));
- }
- return Collections.unmodifiableList(list);
- }
+// /**
+// Get the attributes as a List, for iteration.
+// @return an view of the attributes as an unmodifialbe List.
+// */
+// public List asList() {
+// ArrayList list = new ArrayList<>(size);
+// for (int i = 0; i < size; i++) {
+//// Attribute attr = vals[i] == null ?
+//// new BooleanAttribute(keys[i]) : // deprecated class, but maybe someone still wants it
+//// new Attribute(keys[i], vals[i], Attributes.this);
+//// list.add(attr);
+// list.add(new Attribute(keys[i], vals[i], Attributes.this));
+// }
+// return Collections.unmodifiableList(list);
+// }
// /**
// * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
diff --git a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/parser/CharacterReader.java b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/parser/CharacterReader.java
index 0ae673a9..f7b7d892 100644
--- a/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/parser/CharacterReader.java
+++ b/markwon-html-parser-impl/src/main/java/ru/noties/markwon/html/impl/jsoup/parser/CharacterReader.java
@@ -1,5 +1,7 @@
package ru.noties.markwon.html.impl.jsoup.parser;
+import android.support.annotation.NonNull;
+
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
@@ -10,12 +12,12 @@ import ru.noties.markwon.html.impl.jsoup.UncheckedIOException;
import ru.noties.markwon.html.impl.jsoup.helper.Validate;
/**
- CharacterReader consumes tokens off a string. Used internally by jsoup. API subject to changes.
+ * CharacterReader consumes tokens off a string. Used internally by jsoup. API subject to changes.
*/
public final class CharacterReader {
static final char EOF = (char) -1;
private static final int maxStringCacheLen = 12;
- static final int maxBufferLen = 1024 * 32; // visible for testing
+ static final int maxBufferLen = 1024 * 4; // visible for testing
private static final int readAheadLimit = (int) (maxBufferLen * 0.75);
private final char[] charBuf;
@@ -25,13 +27,13 @@ public final class CharacterReader {
private int bufPos;
private int readerPos;
private int bufMark;
- private final String[] stringCache = new String[512]; // holds reused strings in this doc, to lessen garbage
+ private final String[] stringCache = new String[128]; // holds reused strings in this doc, to lessen garbage
public CharacterReader(Reader input, int sz) {
Validate.notNull(input);
Validate.isTrue(input.markSupported());
reader = input;
- charBuf = new char[sz > maxBufferLen ? maxBufferLen : sz];
+ charBuf = new char[maxBufferLen];
bufferUp();
}
@@ -43,6 +45,15 @@ public final class CharacterReader {
this(new StringReader(input), input.length());
}
+// public void swapInput(@NonNull String input) {
+// reader = new StringReader(input);
+// bufLength = 0;
+// bufSplitPoint = 0;
+// bufPos = 0;
+// readerPos = 0;
+// bufferUp();
+// }
+
private void bufferUp() {
if (bufPos < bufSplitPoint)
return;
@@ -66,6 +77,7 @@ public final class CharacterReader {
/**
* Gets the current cursor position in the content.
+ *
* @return current position
*/
public int pos() {
@@ -74,6 +86,7 @@ public final class CharacterReader {
/**
* Tests if all the content has been read.
+ *
* @return true if nothing left to read.
*/
public boolean isEmpty() {
@@ -87,6 +100,7 @@ public final class CharacterReader {
/**
* Get the char at the current position.
+ *
* @return char
*/
public char current() {
@@ -122,6 +136,7 @@ public final class CharacterReader {
/**
* Returns the number of characters between the current position and the next instance of the input char
+ *
* @param c scan target
* @return offset between current position and next instance of target. -1 if not found.
*/
@@ -148,9 +163,9 @@ public final class CharacterReader {
for (int offset = bufPos; offset < bufLength; offset++) {
// scan to first instance of startchar:
if (startChar != charBuf[offset])
- while(++offset < bufLength && startChar != charBuf[offset]) { /* empty */ }
+ while (++offset < bufLength && startChar != charBuf[offset]) { /* empty */ }
int i = offset + 1;
- int last = i + seq.length()-1;
+ int last = i + seq.length() - 1;
if (offset < bufLength && last <= bufLength) {
for (int j = 1; i < last && seq.charAt(j) == charBuf[i]; i++, j++) { /* empty */ }
if (i == last) // found full sequence
@@ -162,6 +177,7 @@ public final class CharacterReader {
/**
* Reads characters up to the specific char.
+ *
* @param c the delimiter
* @return the chars read
*/
@@ -189,6 +205,7 @@ public final class CharacterReader {
/**
* Read characters until the first of any delimiters is found.
+ *
* @param chars delimiters to scan for
* @return characters read up to the matched delimiter.
*/
@@ -198,7 +215,8 @@ public final class CharacterReader {
final int remaining = bufLength;
final char[] val = charBuf;
- OUTER: while (bufPos < remaining) {
+ OUTER:
+ while (bufPos < remaining) {
for (char c : chars) {
if (val[bufPos] == c)
break OUTER;
@@ -206,7 +224,7 @@ public final class CharacterReader {
bufPos++;
}
- return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+ return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos - start) : "";
}
String consumeToAnySorted(final char... chars) {
@@ -221,7 +239,7 @@ public final class CharacterReader {
bufPos++;
}
- return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+ return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos - start) : "";
}
String consumeData() {
@@ -233,12 +251,12 @@ public final class CharacterReader {
while (bufPos < remaining) {
final char c = val[bufPos];
- if (c == '&'|| c == '<' || c == TokeniserState.nullChar)
+ if (c == '&' || c == '<' || c == TokeniserState.nullChar)
break;
bufPos++;
}
- return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+ return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos - start) : "";
}
String consumeTagName() {
@@ -250,12 +268,12 @@ public final class CharacterReader {
while (bufPos < remaining) {
final char c = val[bufPos];
- if (c == '\t'|| c == '\n'|| c == '\r'|| c == '\f'|| c == ' '|| c == '/'|| c == '>'|| c == TokeniserState.nullChar)
+ if (c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == ' ' || c == '/' || c == '>' || c == TokeniserState.nullChar)
break;
bufPos++;
}
- return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos -start) : "";
+ return bufPos > start ? cacheString(charBuf, stringCache, start, bufPos - start) : "";
}
String consumeToEnd() {
@@ -338,7 +356,7 @@ public final class CharacterReader {
return false;
for (int offset = 0; offset < scanLength; offset++)
- if (seq.charAt(offset) != charBuf[bufPos +offset])
+ if (seq.charAt(offset) != charBuf[bufPos + offset])
return false;
return true;
}
@@ -423,7 +441,7 @@ public final class CharacterReader {
/**
* Caches short strings, as a flywheel pattern, to reduce GC load. Just for this doc, to prevent leaks.
- *
+ *
* Simplistic, and on hash collisions just falls back to creating a new string, vs a full HashMap with Entry list.
* That saves both having to create objects as hash keys, and running through the entry list, at the expense of
* some more duplicates.
diff --git a/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacementTest.java b/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacementTest.java
index b4ca8996..eab3a9f1 100644
--- a/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacementTest.java
+++ b/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/HtmlEmptyTagReplacementTest.java
@@ -3,8 +3,10 @@ package ru.noties.markwon.html.impl;
import org.junit.Before;
import org.junit.Test;
-import ru.noties.markwon.html.impl.jsoup.nodes.Attributes;
-import ru.noties.markwon.html.impl.jsoup.parser.Token;
+import java.util.Collections;
+
+import ru.noties.markwon.html.api.HtmlTag;
+import ru.noties.markwon.html.impl.HtmlTagImpl.InlineImpl;
import static org.junit.Assert.assertEquals;
@@ -19,24 +21,27 @@ public class HtmlEmptyTagReplacementTest {
@Test
public void imageReplacementNoAlt() {
- final Token.StartTag startTag = new Token.StartTag();
- startTag.normalName = "img";
- assertEquals("\uFFFC", replacement.replace(startTag));
+ final HtmlTag.Inline img = new InlineImpl("img", -1, Collections.emptyMap());
+ assertEquals("\uFFFC", replacement.replace(img));
}
@Test
public void imageReplacementAlt() {
- final Token.StartTag startTag = new Token.StartTag();
- startTag.normalName = "img";
- startTag.attributes = new Attributes().put("alt", "alternative27");
- assertEquals("alternative27", replacement.replace(startTag));
+ final HtmlTag.Inline img = new InlineImpl(
+ "img",
+ -1,
+ Collections.singletonMap("alt", "alternative27")
+ );
+ assertEquals("alternative27", replacement.replace(img));
}
@Test
public void brAddsNewLine() {
- final Token.StartTag startTag = new Token.StartTag();
- startTag.normalName = "br";
- startTag.selfClosing = true;
- assertEquals("\n", replacement.replace(startTag));
+ final HtmlTag.Inline br = new InlineImpl(
+ "br",
+ -1,
+ Collections.emptyMap()
+ );
+ assertEquals("\n", replacement.replace(br));
}
}
\ No newline at end of file
diff --git a/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImplTest.java b/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImplTest.java
index 1dd20668..3edcfeb1 100644
--- a/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImplTest.java
+++ b/markwon-html-parser-impl/src/test/java/ru/noties/markwon/html/impl/MarkwonHtmlParserImplTest.java
@@ -17,7 +17,6 @@ import java.util.Set;
import ru.noties.markwon.html.api.HtmlTag;
import ru.noties.markwon.html.api.MarkwonHtmlParser;
-import ru.noties.markwon.html.impl.jsoup.parser.Token;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
@@ -36,8 +35,8 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable
@Override
- public String replace(@NonNull Token.StartTag startTag) {
- return startTag.normalName;
+ public String replace(@NonNull HtmlTag tag) {
+ return tag.name();
}
});
@@ -98,7 +97,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable
@Override
- public String replace(@NonNull Token.StartTag startTag) {
+ public String replace(@NonNull HtmlTag tag) {
return null;
}
});
@@ -143,7 +142,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable
@Override
- public String replace(@NonNull Token.StartTag startTag) {
+ public String replace(@NonNull HtmlTag tag) {
return null;
}
});
@@ -212,7 +211,7 @@ public class MarkwonHtmlParserImplTest {
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
@Nullable
@Override
- public String replace(@NonNull Token.StartTag startTag) {
+ public String replace(@NonNull HtmlTag tag) {
return null;
}
});
@@ -278,10 +277,9 @@ public class MarkwonHtmlParserImplTest {
);
final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create(new HtmlEmptyTagReplacement() {
- @Nullable
@Override
- public String replace(@NonNull Token.StartTag startTag) {
- return startTag.normalName;
+ public String replace(@NonNull HtmlTag tag) {
+ return tag.name();
}
});
@@ -473,8 +471,6 @@ public class MarkwonHtmlParserImplTest {
final StringBuilder output = new StringBuilder();
impl.processFragment(output, "italic emphasis italic
");
- System.out.printf("output: `%s`%n", output);
-
final CaptureInlineTagsAction inlineTagsAction = new CaptureInlineTagsAction();
final CaptureBlockTagsAction blockTagsAction = new CaptureBlockTagsAction();
@@ -804,6 +800,39 @@ public class MarkwonHtmlParserImplTest {
assertEquals(Arrays.toString(split), 5, split.length);
}
+ @Test
+ public void attributesAreLowerCase() {
+
+ final MarkwonHtmlParserImpl impl = MarkwonHtmlParserImpl.create();
+ final StringBuilder output = new StringBuilder();
+
+ impl.processFragment(output, "");
+
+ final CaptureInlineTagsAction action = new CaptureInlineTagsAction();
+ impl.flushInlineTags(output.length(), action);
+
+ assertTrue(action.called);
+ assertEquals(1, action.tags.size());
+
+ with(action.tags.get(0), new Action() {
+ @Override
+ public void apply(@NonNull HtmlTag.Inline inline) {
+
+ assertEquals("i", inline.name());
+
+ with(inline.attributes(), new Action