From 93a14b47313ec6c0f20d8c36f52938d658da0ed7 Mon Sep 17 00:00:00 2001 From: Dimitry Ivanov Date: Wed, 13 Nov 2019 13:38:59 +0300 Subject: [PATCH] Created inline-parser module --- markwon-inline-parser/README.md | 20 + markwon-inline-parser/build.gradle | 21 + markwon-inline-parser/gradle.properties | 4 + .../src/main/AndroidManifest.xml | 1 + .../AsteriskDelimiterProcessor.java | 7 + .../inlineparser/AutolinkInlineProcessor.java | 20 +- .../BackslashInlineProcessor.java | 22 +- .../BackticksInlineProcessor.java | 22 +- .../inlineparser/BangInlineProcessor.java | 21 +- .../CloseBracketInlineProcessor.java | 27 +- .../inlineparser/EntityInlineProcessor.java | 20 +- .../inlineparser/HtmlInlineProcessor.java | 23 +- .../inlineparser/InlineParserUtils.java | 77 ++ .../markwon/inlineparser/InlineProcessor.java | 148 ++ .../inlineparser/MarkwonInlineParser.java | 785 +++++------ .../MarkwonInlineParserContext.java | 65 + .../inlineparser/NewLineInlineProcessor.java | 18 +- .../OpenBracketInlineProcessor.java | 22 +- .../StaggeredDelimiterProcessor.java | 3 +- .../UnderscoreDelimiterProcessor.java | 7 + sample/build.gradle | 1 + .../markwon/sample/editor/EditorActivity.java | 94 +- .../markwon/sample/editor/inline/Inline.java | 429 ------ .../sample/editor/inline/InlineContext.java | 62 - .../editor/inline/InlineParserImpl.java | 1190 ----------------- settings.gradle | 1 + 26 files changed, 790 insertions(+), 2320 deletions(-) create mode 100644 markwon-inline-parser/README.md create mode 100644 markwon-inline-parser/build.gradle create mode 100644 markwon-inline-parser/gradle.properties create mode 100644 markwon-inline-parser/src/main/AndroidManifest.xml create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java (77%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java (57%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java (72%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java (60%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java (88%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java (62%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java (69%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java (56%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java (80%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java (52%) rename {sample/src/main/java/io/noties/markwon/sample/editor/inline => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser}/StaggeredDelimiterProcessor.java (97%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java diff --git a/markwon-inline-parser/README.md b/markwon-inline-parser/README.md new file mode 100644 index 00000000..5b0e1335 --- /dev/null +++ b/markwon-inline-parser/README.md @@ -0,0 +1,20 @@ +# Inline parser + +**Experimental** due to usage of internal (but still visible) classes of commonmark-java: + +```java +import org.commonmark.internal.Bracket; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.util.Html5Entities; +import org.commonmark.internal.util.Parsing; +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +``` + +`StaggeredDelimiterProcessor` class source is copied (required for InlineParser) \ No newline at end of file diff --git a/markwon-inline-parser/build.gradle b/markwon-inline-parser/build.gradle new file mode 100644 index 00000000..d7a2ed99 --- /dev/null +++ b/markwon-inline-parser/build.gradle @@ -0,0 +1,21 @@ +apply plugin: 'com.android.library' + +android { + + compileSdkVersion config['compile-sdk'] + buildToolsVersion config['build-tools'] + + defaultConfig { + minSdkVersion config['min-sdk'] + targetSdkVersion config['target-sdk'] + versionCode 1 + versionName version + } +} + +dependencies { + api deps['x-annotations'] + api deps['commonmark'] +} + +registerArtifact(this) \ No newline at end of file diff --git a/markwon-inline-parser/gradle.properties b/markwon-inline-parser/gradle.properties new file mode 100644 index 00000000..d386a6a9 --- /dev/null +++ b/markwon-inline-parser/gradle.properties @@ -0,0 +1,4 @@ +POM_NAME=Inline Parser +POM_ARTIFACT_ID=inline-parser +POM_DESCRIPTION=Markwon customizable commonmark-java InlineParse +POM_PACKAGING=aar \ No newline at end of file diff --git a/markwon-inline-parser/src/main/AndroidManifest.xml b/markwon-inline-parser/src/main/AndroidManifest.xml new file mode 100644 index 00000000..1a8bcbb5 --- /dev/null +++ b/markwon-inline-parser/src/main/AndroidManifest.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java new file mode 100644 index 00000000..3a8d570e --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java @@ -0,0 +1,7 @@ +package io.noties.markwon.inlineparser; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class AsteriskDelimiterProcessor extends org.commonmark.internal.inline.AsteriskDelimiterProcessor { +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java similarity index 77% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java index beaa72c4..6351fe64 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java @@ -1,15 +1,16 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.Link; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class AutolinkInline extends Inline { +/** + * Parses autolinks, for example {@code } + * + * @since 4.2.0-SNAPSHOT + */ +public class AutolinkInlineProcessor extends InlineProcessor { private static final Pattern EMAIL_AUTOLINK = Pattern .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); @@ -17,14 +18,13 @@ public class AutolinkInline extends Inline { private static final Pattern AUTOLINK = Pattern .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - @NonNull @Override - public Collection characters() { - return Collections.singleton('<'); + public char specialCharacter() { + return '<'; } @Override - public boolean parse() { + protected boolean parse() { String m; if ((m = match(EMAIL_AUTOLINK)) != null) { String dest = m.substring(1, m.length() - 1); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java similarity index 57% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java index 72b21060..e8f433ca 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java @@ -1,21 +1,23 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.HardLineBreak; -import java.util.Collection; -import java.util.Collections; +import java.util.regex.Pattern; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class BackslashInlineProcessor extends InlineProcessor { + + private static final Pattern ESCAPABLE = MarkwonInlineParser.ESCAPABLE; -public class BackslashInline extends Inline { - @NonNull @Override - public Collection characters() { - return Collections.singleton('\\'); + public char specialCharacter() { + return '\\'; } @Override - public boolean parse() { + protected boolean parse() { index++; if (peek() == '\n') { appendNode(new HardLineBreak()); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java similarity index 72% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java index 4ead3d4d..f0c8da9c 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java @@ -1,27 +1,29 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.Code; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class BackticksInline extends Inline { +/** + * Parses inline code surrounded with {@code `} chars {@code `code`} + * + * @since 4.2.0-SNAPSHOT + */ +public class BackticksInlineProcessor extends InlineProcessor { private static final Pattern TICKS = Pattern.compile("`+"); private static final Pattern TICKS_HERE = Pattern.compile("^`+"); - @NonNull + private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE; + @Override - public Collection characters() { - return Collections.singleton('`'); + public char specialCharacter() { + return '`'; } @Override - public boolean parse() { + protected boolean parse() { String ticks = match(TICKS_HERE); if (ticks == null) { return false; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java similarity index 60% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java index 0416f40c..7b9995ac 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java @@ -1,22 +1,21 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; - -public class BangInline extends Inline { - @NonNull +/** + * Parses markdown images {@code ![alt](#href)} + * + * @since 4.2.0-SNAPSHOT + */ +public class BangInlineProcessor extends InlineProcessor { @Override - public Collection characters() { - return Collections.singleton('!'); + public char specialCharacter() { + return '!'; } @Override - public boolean parse() { + protected boolean parse() { int startIndex = index; index++; if (peek() == '[') { diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java similarity index 88% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java index 78366685..d48f0da2 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java @@ -1,6 +1,4 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.internal.util.Escaping; @@ -8,18 +6,27 @@ import org.commonmark.node.Image; import org.commonmark.node.Link; import org.commonmark.node.Node; -import java.util.Collection; -import java.util.Collections; +import java.util.regex.Pattern; + +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes; + +/** + * Parses markdown link or image, relies on {@link OpenBracketInlineProcessor} + * to handle start of these elements + * + * @since 4.2.0-SNAPSHOT + */ +public class CloseBracketInlineProcessor extends InlineProcessor { + + private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE; -public class CloseBracketInline extends Inline { - @NonNull @Override - public Collection characters() { - return Collections.singleton(']'); + public char specialCharacter() { + return ']'; } @Override - public boolean parse() { + protected boolean parse() { index++; int startIndex = index; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java similarity index 62% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java index f7592aed..c1229bd8 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java @@ -1,26 +1,26 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.util.Html5Entities; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class EntityInline extends Inline { +/** + * Parses HTML entities {@code &} + * + * @since 4.2.0-SNAPSHOT + */ +public class EntityInlineProcessor extends InlineProcessor { private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); - @NonNull @Override - public Collection characters() { - return Collections.singleton('&'); + public char specialCharacter() { + return '&'; } @Override - public boolean parse() { + protected boolean parse() { String m; if ((m = match(ENTITY_HERE)) != null) { appendText(Html5Entities.entityToString(m)); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java similarity index 69% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java index 34686ec2..2872491c 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java @@ -1,14 +1,16 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.util.Parsing; +import org.commonmark.node.HtmlInline; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class HtmlInline extends Inline { +/** + * Parses inline HTML tags + * + * @since 4.2.0-SNAPSHOT + */ +public class HtmlInlineProcessor extends InlineProcessor { private static final String HTMLCOMMENT = "|"; private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; @@ -18,17 +20,16 @@ public class HtmlInline extends Inline { + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - @NonNull @Override - public Collection characters() { - return Collections.singleton('<'); + public char specialCharacter() { + return '<'; } @Override - public boolean parse() { + protected boolean parse() { String m = match(HTML_TAG); if (m != null) { - org.commonmark.node.HtmlInline node = new org.commonmark.node.HtmlInline(); + HtmlInline node = new HtmlInline(); node.setLiteral(m); appendNode(node); return true; diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java new file mode 100644 index 00000000..544576ee --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java @@ -0,0 +1,77 @@ +package io.noties.markwon.inlineparser; + +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +/** + * @since 4.2.0-SNAPSHOT + */ +public abstract class InlineParserUtils { + + public static void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { + // No nodes between them + if (fromNode == toNode || fromNode.getNext() == toNode) { + return; + } + + mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); + } + + public static void mergeChildTextNodes(Node node) { + // No children or just one child node, no need for merging + if (node.getFirstChild() == node.getLastChild()) { + return; + } + + mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); + } + + public static void mergeTextNodesInclusive(Node fromNode, Node toNode) { + Text first = null; + Text last = null; + int length = 0; + + Node node = fromNode; + while (node != null) { + if (node instanceof Text) { + Text text = (Text) node; + if (first == null) { + first = text; + } + length += text.getLiteral().length(); + last = text; + } else { + mergeIfNeeded(first, last, length); + first = null; + last = null; + length = 0; + } + if (node == toNode) { + break; + } + node = node.getNext(); + } + + mergeIfNeeded(first, last, length); + } + + public static void mergeIfNeeded(Text first, Text last, int textLength) { + if (first != null && last != null && first != last) { + StringBuilder sb = new StringBuilder(textLength); + sb.append(first.getLiteral()); + Node node = first.getNext(); + Node stop = last.getNext(); + while (node != stop) { + sb.append(((Text) node).getLiteral()); + Node unlink = node; + node = node.getNext(); + unlink.unlink(); + } + String literal = sb.toString(); + first.setLiteral(literal); + } + } + + private InlineParserUtils() { + } +} diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java new file mode 100644 index 00000000..2462e324 --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java @@ -0,0 +1,148 @@ +package io.noties.markwon.inlineparser; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +import java.util.Map; +import java.util.regex.Pattern; + +/** + * @see AutolinkInlineProcessor + * @see BackslashInlineProcessor + * @see BackticksInlineProcessor + * @see BangInlineProcessor + * @see CloseBracketInlineProcessor + * @see EntityInlineProcessor + * @see HtmlInlineProcessor + * @see NewLineInlineProcessor + * @see OpenBracketInlineProcessor + * @see MarkwonInlineParser.FactoryBuilder#addInlineProcessor(InlineProcessor) + * @see MarkwonInlineParser.FactoryBuilder#excludeInlineProcessor(Class) + * @since 4.2.0-SNAPSHOT + */ +public abstract class InlineProcessor { + + /** + * Special character that triggers parsing attempt + */ + public abstract char specialCharacter(); + + /** + * @return boolean indicating if parsing succeeded + */ + protected abstract boolean parse(); + + + protected MarkwonInlineParserContext context; + protected Node block; + protected String input; + protected int index; + + public boolean parse(@NonNull MarkwonInlineParserContext context) { + this.context = context; + this.block = context.block(); + this.input = context.input(); + this.index = context.index(); + + final boolean result = parse(); + + // synchronize index + context.setIndex(index); + + return result; + } + + protected Bracket lastBracket() { + return context.lastBracket(); + } + + protected Delimiter lastDelimiter() { + return context.lastDelimiter(); + } + + @NonNull + protected Map referenceMap() { + return context.referenceMap(); + } + + protected void addBracket(Bracket bracket) { + context.addBracket(bracket); + } + + protected void removeLastBracket() { + context.removeLastBracket(); + } + + protected void spnl() { + context.setIndex(index); + context.spnl(); + index = context.index(); + } + + @Nullable + protected String match(@NonNull Pattern re) { + // before trying to match, we must notify context about our index (which we store additionally here) + context.setIndex(index); + + final String result = context.match(re); + + // after match we must reflect index change here + this.index = context.index(); + + return result; + } + + @Nullable + protected String parseLinkDestination() { + context.setIndex(index); + final String result = context.parseLinkDestination(); + this.index = context.index(); + return result; + } + + @Nullable + protected String parseLinkTitle() { + context.setIndex(index); + final String result = context.parseLinkTitle(); + this.index = context.index(); + return result; + } + + protected int parseLinkLabel() { + context.setIndex(index); + final int result = context.parseLinkLabel(); + this.index = context.index(); + return result; + } + + protected void processDelimiters(Delimiter stackBottom) { + context.setIndex(index); + context.processDelimiters(stackBottom); + this.index = context.index(); + } + + protected void appendNode(@NonNull Node node) { + context.appendNode(node); + } + + @NonNull + protected Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) { + return context.appendText(text, beginIndex, endIndex); + } + + @NonNull + protected Text appendText(@NonNull CharSequence text) { + return context.appendText(text); + } + + protected char peek() { + context.setIndex(index); + return context.peek(); + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java similarity index 56% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java index ee45884d..5bdda362 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java @@ -1,27 +1,21 @@ -package io.noties.markwon.sample.editor.inline; +package io.noties.markwon.inlineparser; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; import org.commonmark.internal.Bracket; import org.commonmark.internal.Delimiter; import org.commonmark.internal.ReferenceParser; -import org.commonmark.internal.inline.AsteriskDelimiterProcessor; -import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; import org.commonmark.internal.util.Escaping; -import org.commonmark.internal.util.Html5Entities; -import org.commonmark.internal.util.Parsing; -import org.commonmark.node.Code; -import org.commonmark.node.HardLineBreak; -import org.commonmark.node.HtmlInline; -import org.commonmark.node.Image; import org.commonmark.node.Link; import org.commonmark.node.Node; -import org.commonmark.node.SoftLineBreak; import org.commonmark.node.Text; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.InlineParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; @@ -31,28 +25,66 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class InlineParserOriginal implements InlineParser, ReferenceParser { +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes; +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeTextNodesBetweenExclusive; + +/** + * @see #factoryBuilder() + * @see FactoryBuilder + * @since 4.2.0-SNAPSHOT + */ +public class MarkwonInlineParser implements InlineParser, ReferenceParser, MarkwonInlineParserContext { + + public interface FactoryBuilder { + + /** + * @see InlineProcessor + */ + @NonNull + FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor); + + /** + * @see AsteriskDelimiterProcessor + * @see UnderscoreDelimiterProcessor + */ + @NonNull + FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor); + + /** + * Indicate if markdown references are enabled. {@code referencesEnabled=true} if {@link #includeDefaults()} + * was called + */ + @NonNull + FactoryBuilder referencesEnabled(boolean referencesEnabled); + + /** + * Includes all default delimiter and inline processors, and sets {@code referencesEnabled=true}. + * Useful with subsequent calls to {@link #excludeInlineProcessor(Class)} or {@link #excludeDelimiterProcessor(Class)} + */ + @NonNull + FactoryBuilder includeDefaults(); + + @NonNull + FactoryBuilder excludeInlineProcessor(@NonNull Class processor); + + @NonNull + FactoryBuilder excludeDelimiterProcessor(@NonNull Class processor); + + @NonNull + InlineParserFactory build(); + } @NonNull - public static InlineParserFactory factory() { - return context -> new InlineParserOriginal(context.getCustomDelimiterProcessors()); + public static FactoryBuilder factoryBuilder() { + return new FactoryBuilderImpl(); } private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; - private static final String HTMLCOMMENT = "|"; - private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; - private static final String DECLARATION = "]*>"; - private static final String CDATA = ""; - private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT - + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; - private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; private static final Pattern PUNCTUATION = Pattern .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); - private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - private static final Pattern LINK_TITLE = Pattern.compile( "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + '|' + @@ -64,43 +96,29 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - - private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); - - private static final Pattern TICKS = Pattern.compile("`+"); - - private static final Pattern TICKS_HERE = Pattern.compile("^`+"); - - private static final Pattern EMAIL_AUTOLINK = Pattern - .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); - - private static final Pattern AUTOLINK = Pattern - .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); - private static final Pattern WHITESPACE = Pattern.compile("\\s+"); - - private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); + static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + static final Pattern WHITESPACE = Pattern.compile("\\s+"); + + private final boolean referencesEnabled; + private final BitSet specialCharacters; - private final BitSet delimiterCharacters; + private final Map> inlineProcessors; private final Map delimiterProcessors; + private Node block; + private String input; + private int index; + /** * Link references by ID, needs to be built up using parseReference before calling parse. */ - private Map referenceMap = new HashMap<>(); - - private Node block; - - private String input; - private int index; + private Map referenceMap = new HashMap<>(1); /** * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different @@ -113,37 +131,49 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { */ private Bracket lastBracket; - public InlineParserOriginal(List delimiterProcessors) { + // might we construct these in factory? + public MarkwonInlineParser( + boolean referencesEnabled, + @NonNull List inlineProcessors, + @NonNull List delimiterProcessors) { + this.referencesEnabled = referencesEnabled; + this.inlineProcessors = calculateInlines(inlineProcessors); this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); - this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); - this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); + this.specialCharacters = calculateSpecialCharacters( + this.inlineProcessors.keySet(), + this.delimiterProcessors.keySet()); } - public static BitSet calculateDelimiterCharacters(Set characters) { - BitSet bitSet = new BitSet(); - for (Character character : characters) { - bitSet.set(character); + @NonNull + private static Map> calculateInlines(@NonNull List inlines) { + final Map> map = new HashMap<>(inlines.size()); + List list; + for (InlineProcessor inlineProcessor : inlines) { + final char character = inlineProcessor.specialCharacter(); + list = map.get(character); + if (list == null) { + list = new ArrayList<>(1); + map.put(character, list); + } + list.add(inlineProcessor); + } + return map; + } + + @NonNull + private static BitSet calculateSpecialCharacters(Set inlineCharacters, Set delimiterCharacters) { + final BitSet bitSet = new BitSet(); + for (Character c : inlineCharacters) { + bitSet.set(c); + } + for (Character c : delimiterCharacters) { + bitSet.set(c); } return bitSet; } - public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { - BitSet bitSet = new BitSet(); - bitSet.or(delimiterCharacters); - bitSet.set('\n'); - bitSet.set('`'); - bitSet.set('['); - bitSet.set(']'); - bitSet.set('\\'); - bitSet.set('!'); - bitSet.set('<'); - bitSet.set('&'); - return bitSet; - } - - public static Map calculateDelimiterProcessors(List delimiterProcessors) { + private static Map calculateDelimiterProcessors(List delimiterProcessors) { Map map = new HashMap<>(); - addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); addDelimiterProcessors(delimiterProcessors, map); return map; } @@ -206,6 +236,11 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { */ @Override public int parseReference(String s) { + + if (!referencesEnabled) { + return 0; + } + this.input = s; this.index = 0; String dest; @@ -275,17 +310,22 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return index - startIndex; } - private Text appendText(CharSequence text, int beginIndex, int endIndex) { + @Override + @NonNull + public Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) { return appendText(text.subSequence(beginIndex, endIndex)); } - private Text appendText(CharSequence text) { + @Override + @NonNull + public Text appendText(@NonNull CharSequence text) { Text node = new Text(text.toString()); appendNode(node); return node; } - private void appendNode(Node node) { + @Override + public void appendNode(@NonNull Node node) { block.appendChild(node); } @@ -295,46 +335,33 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { * On failure, return false. */ private boolean parseInline() { - boolean res; - char c = peek(); + + final char c = peek(); + if (c == '\0') { return false; } - switch (c) { - case '\n': - res = parseNewline(); - break; - case '\\': - res = parseBackslash(); - break; - case '`': - res = parseBackticks(); - break; - case '[': - res = parseOpenBracket(); - break; - case '!': - res = parseBang(); - break; - case ']': - res = parseCloseBracket(); - break; - case '<': - res = parseAutolink() || parseHtmlInline(); - break; - case '&': - res = parseEntity(); - break; - default: - boolean isDelimiter = delimiterCharacters.get(c); - if (isDelimiter) { - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); - res = parseDelimiters(delimiterProcessor, c); - } else { - res = parseString(); + + boolean res = false; + + final List inlines = this.inlineProcessors.get(c); + + if (inlines != null) { + for (InlineProcessor inline : inlines) { + res = inline.parse(this); + if (res) { + break; } - break; + } + } else { + final DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); + if (delimiterProcessor != null) { + res = parseDelimiters(delimiterProcessor, c); + } else { + res = parseString(); + } } + if (!res) { index++; // When we get here, it's only for a single special character that turned out to not have a special meaning. @@ -349,7 +376,9 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * If RE matches at current index in the input, advance index and return the match; otherwise return null. */ - private String match(Pattern re) { + @Override + @Nullable + public String match(@NonNull Pattern re) { if (index >= input.length()) { return null; } @@ -367,7 +396,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. */ - private char peek() { + @Override + public char peek() { if (index < input.length()) { return input.charAt(index); } else { @@ -375,87 +405,66 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } + @NonNull + @Override + public Node block() { + return block; + } + + @NonNull + @Override + public String input() { + return input; + } + + @Override + public int index() { + return index; + } + + @Override + public void setIndex(int index) { + this.index = index; + } + + @Override + public Bracket lastBracket() { + return lastBracket; + } + + @Override + public Delimiter lastDelimiter() { + return lastDelimiter; + } + + @NonNull + @Override + public Map referenceMap() { + return referenceMap; + } + + @Override + public void addBracket(Bracket bracket) { + if (lastBracket != null) { + lastBracket.bracketAfter = true; + } + lastBracket = bracket; + } + + @Override + public void removeLastBracket() { + lastBracket = lastBracket.previous; + } + /** * Parse zero or more space characters, including at most one newline. */ - private boolean spnl() { + @Override + public boolean spnl() { match(SPNL); return true; } - /** - * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. - */ - private boolean parseNewline() { - index++; // assume we're at a \n - - Node lastChild = block.getLastChild(); - // Check previous text for trailing spaces. - // The "endsWith" is an optimization to avoid an RE match in the common case. - if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { - Text text = (Text) lastChild; - String literal = text.getLiteral(); - Matcher matcher = FINAL_SPACE.matcher(literal); - int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; - if (spaces > 0) { - text.setLiteral(literal.substring(0, literal.length() - spaces)); - } - appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); - } else { - appendNode(new SoftLineBreak()); - } - - // gobble leading spaces in next line - while (peek() == ' ') { - index++; - } - return true; - } - - /** - * Parse a backslash-escaped special character, adding either the escaped character, a hard line break - * (if the backslash is followed by a newline), or a literal backslash to the block's children. - */ - private boolean parseBackslash() { - index++; - if (peek() == '\n') { - appendNode(new HardLineBreak()); - index++; - } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { - appendText(input, index, index + 1); - index++; - } else { - appendText("\\"); - } - return true; - } - - /** - * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. - */ - private boolean parseBackticks() { - String ticks = match(TICKS_HERE); - if (ticks == null) { - return false; - } - int afterOpenTicks = index; - String matched; - while ((matched = match(TICKS)) != null) { - if (matched.equals(ticks)) { - Code node = new Code(); - String content = input.substring(afterOpenTicks, index - ticks.length()); - String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); - node.setLiteral(literal); - appendNode(node); - return true; - } - } - // If we got here, we didn't match a closing backtick sequence. - index = afterOpenTicks; - appendText(ticks); - return true; - } - /** * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. */ @@ -481,174 +490,12 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return true; } - /** - * Add open bracket to delimiter stack and add a text node to block's children. - */ - private boolean parseOpenBracket() { - int startIndex = index; - index++; - - Text node = appendText("["); - - // Add entry to stack for this opener - addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); - - return true; - } - - /** - * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. - * Otherwise just add a text node. - */ - private boolean parseBang() { - int startIndex = index; - index++; - if (peek() == '[') { - index++; - - Text node = appendText("!["); - - // Add entry to stack for this opener - addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); - } else { - appendText("!"); - } - return true; - } - - /** - * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a - * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. - */ - private boolean parseCloseBracket() { - index++; - int startIndex = index; - - // Get previous `[` or `![` - Bracket opener = lastBracket; - if (opener == null) { - // No matching opener, just return a literal. - appendText("]"); - return true; - } - - if (!opener.allowed) { - // Matching opener but it's not allowed, just return a literal. - appendText("]"); - removeLastBracket(); - return true; - } - - // Check to see if we have a link/image - - String dest = null; - String title = null; - boolean isLinkOrImage = false; - - // Maybe a inline link like `[foo](/uri "title")` - if (peek() == '(') { - index++; - spnl(); - if ((dest = parseLinkDestination()) != null) { - spnl(); - // title needs a whitespace before - if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { - title = parseLinkTitle(); - spnl(); - } - if (peek() == ')') { - index++; - isLinkOrImage = true; - } else { - index = startIndex; - } - } - } - - // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` - if (!isLinkOrImage) { - - // See if there's a link label like `[bar]` or `[]` - int beforeLabel = index; - int labelLength = parseLinkLabel(); - String ref = null; - if (labelLength > 2) { - ref = input.substring(beforeLabel, beforeLabel + labelLength); - } else if (!opener.bracketAfter) { - // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. - // But it can only be a reference when there's no (unescaped) bracket in it. - // If there is, we don't even need to try to look up the reference. This is an optimization. - ref = input.substring(opener.index, startIndex); - } - - if (ref != null) { - Link link = referenceMap.get(Escaping.normalizeReference(ref)); - if (link != null) { - dest = link.getDestination(); - title = link.getTitle(); - isLinkOrImage = true; - } - } - } - - if (isLinkOrImage) { - // If we got here, open is a potential opener - Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); - - Node node = opener.node.getNext(); - while (node != null) { - Node next = node.getNext(); - linkOrImage.appendChild(node); - node = next; - } - appendNode(linkOrImage); - - // Process delimiters such as emphasis inside link/image - processDelimiters(opener.previousDelimiter); - mergeChildTextNodes(linkOrImage); - // We don't need the corresponding text node anymore, we turned it into a link/image node - opener.node.unlink(); - removeLastBracket(); - - // Links within links are not allowed. We found this link, so there can be no other link around it. - if (!opener.image) { - Bracket bracket = lastBracket; - while (bracket != null) { - if (!bracket.image) { - // Disallow link opener. It will still get matched, but will not result in a link. - bracket.allowed = false; - } - bracket = bracket.previous; - } - } - - return true; - - } else { // no link or image - - appendText("]"); - removeLastBracket(); - - index = startIndex; - return true; - } - } - - private void addBracket(Bracket bracket) { - if (lastBracket != null) { - lastBracket.bracketAfter = true; - } - lastBracket = bracket; - } - - private void removeLastBracket() { - lastBracket = lastBracket.previous; - } - /** * Attempt to parse link destination, returning the string or null if no match. */ - private String parseLinkDestination() { + @Override + @Nullable + public String parseLinkDestination() { String res = match(LINK_DESTINATION_BRACES); if (res != null) { // chop off surrounding <..>: if (res.length() == 2) { @@ -705,7 +552,9 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Attempt to parse link title (sans quotes), returning the string or null if no match. */ - private String parseLinkTitle() { + @Override + @Nullable + public String parseLinkTitle() { String title = match(LINK_TITLE); if (title != null) { // chop off quotes from title and unescape: @@ -718,7 +567,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Attempt to parse a link label, returning number of characters parsed. */ - private int parseLinkLabel() { + @Override + public int parseLinkLabel() { String m = match(LINK_LABEL); // Spec says "A link label can have at most 999 characters inside the square brackets" if (m == null || m.length() > 1001) { @@ -728,56 +578,6 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } - /** - * Attempt to parse an autolink (URL or email in pointy brackets). - */ - private boolean parseAutolink() { - String m; - if ((m = match(EMAIL_AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link("mailto:" + dest, null); - node.appendChild(new Text(dest)); - appendNode(node); - return true; - } else if ((m = match(AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link(dest, null); - node.appendChild(new Text(dest)); - appendNode(node); - return true; - } else { - return false; - } - } - - /** - * Attempt to parse inline HTML. - */ - private boolean parseHtmlInline() { - String m = match(HTML_TAG); - if (m != null) { - HtmlInline node = new HtmlInline(); - node.setLiteral(m); - appendNode(node); - return true; - } else { - return false; - } - } - - /** - * Attempt to parse an entity, return Entity object if successful. - */ - private boolean parseEntity() { - String m; - if ((m = match(ENTITY_HERE)) != null) { - appendText(Html5Entities.entityToString(m)); - return true; - } else { - return false; - } - } - /** * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. */ @@ -849,7 +649,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return new DelimiterData(delimiterCount, canOpen, canClose); } - private void processDelimiters(Delimiter stackBottom) { + @Override + public void processDelimiters(Delimiter stackBottom) { Map openersBottom = new HashMap<>(); @@ -981,70 +782,6 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - private void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - private void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - private void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - private static class DelimiterData { final int count; @@ -1057,4 +794,120 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { this.canClose = canClose; } } + + static class FactoryBuilderImpl implements FactoryBuilder { + + private final List inlineProcessors = new ArrayList<>(3); + private final List delimiterProcessors = new ArrayList<>(3); + private boolean referencesEnabled; + + @NonNull + @Override + public FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor) { + this.inlineProcessors.add(processor); + return this; + } + + @NonNull + @Override + public FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor) { + this.delimiterProcessors.add(processor); + return this; + } + + @NonNull + @Override + public FactoryBuilder referencesEnabled(boolean referencesEnabled) { + this.referencesEnabled = referencesEnabled; + return this; + } + + @NonNull + @Override + public FactoryBuilder includeDefaults() { + + // by default enabled + this.referencesEnabled = true; + + this.inlineProcessors.addAll(Arrays.asList( + new AutolinkInlineProcessor(), + new BackslashInlineProcessor(), + new BackticksInlineProcessor(), + new BangInlineProcessor(), + new CloseBracketInlineProcessor(), + new EntityInlineProcessor(), + new HtmlInlineProcessor(), + new NewLineInlineProcessor(), + new OpenBracketInlineProcessor())); + + this.delimiterProcessors.addAll(Arrays.asList( + new AsteriskDelimiterProcessor(), + new UnderscoreDelimiterProcessor())); + + return this; + } + + @NonNull + @Override + public FactoryBuilder excludeInlineProcessor(@NonNull Class type) { + for (int i = 0, size = inlineProcessors.size(); i < size; i++) { + if (type.equals(inlineProcessors.get(i).getClass())) { + inlineProcessors.remove(i); + break; + } + } + return this; + } + + @NonNull + @Override + public FactoryBuilder excludeDelimiterProcessor(@NonNull Class type) { + for (int i = 0, size = delimiterProcessors.size(); i < size; i++) { + if (type.equals(delimiterProcessors.get(i).getClass())) { + delimiterProcessors.remove(i); + break; + } + } + return this; + } + + @NonNull + @Override + public InlineParserFactory build() { + return new InlineParserFactoryImpl(referencesEnabled, inlineProcessors, delimiterProcessors); + } + } + + static class InlineParserFactoryImpl implements InlineParserFactory { + + private final boolean referencesEnabled; + private final List inlineProcessors; + private final List delimiterProcessors; + + InlineParserFactoryImpl( + boolean referencesEnabled, + @NonNull List inlineProcessors, + @NonNull List delimiterProcessors) { + this.referencesEnabled = referencesEnabled; + this.inlineProcessors = inlineProcessors; + this.delimiterProcessors = delimiterProcessors; + } + + @Override + public InlineParser create(InlineParserContext inlineParserContext) { + final List delimiterProcessors; + final List customDelimiterProcessors = inlineParserContext.getCustomDelimiterProcessors(); + final int size = customDelimiterProcessors != null + ? customDelimiterProcessors.size() + : 0; + if (size > 0) { + delimiterProcessors = new ArrayList<>(size + this.delimiterProcessors.size()); + delimiterProcessors.addAll(this.delimiterProcessors); + delimiterProcessors.addAll(customDelimiterProcessors); + } else { + delimiterProcessors = this.delimiterProcessors; + } + return new MarkwonInlineParser(referencesEnabled, inlineProcessors, delimiterProcessors); + } + } } diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java new file mode 100644 index 00000000..726ff4eb --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java @@ -0,0 +1,65 @@ +package io.noties.markwon.inlineparser; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +import java.util.Map; +import java.util.regex.Pattern; + +public interface MarkwonInlineParserContext { + + @NonNull + Node block(); + + @NonNull + String input(); + + int index(); + + void setIndex(int index); + + Bracket lastBracket(); + + Delimiter lastDelimiter(); + + @NonNull + Map referenceMap(); + + void addBracket(Bracket bracket); + + void removeLastBracket(); + + boolean spnl(); + + /** + * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. + */ + char peek(); + + @Nullable + String match(@NonNull Pattern re); + + void appendNode(@NonNull Node node); + + @NonNull + Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex); + + @NonNull + Text appendText(@NonNull CharSequence text); + + @Nullable + String parseLinkDestination(); + + @Nullable + String parseLinkTitle(); + + int parseLinkLabel(); + + void processDelimiters(Delimiter stackBottom); +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java similarity index 80% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java index 6c18ab64..4f08a74f 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java @@ -1,29 +1,27 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.HardLineBreak; import org.commonmark.node.Node; import org.commonmark.node.SoftLineBreak; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class NewLineInline extends Inline { +/** + * @since 4.2.0-SNAPSHOT + */ +public class NewLineInlineProcessor extends InlineProcessor { private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - @NonNull @Override - public Collection characters() { - return Collections.singleton('\n'); + public char specialCharacter() { + return '\n'; } @Override - public boolean parse() { + protected boolean parse() { index++; // assume we're at a \n Node lastChild = block.getLastChild(); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java similarity index 52% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java index c4fe20d9..02edf9bb 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java @@ -1,23 +1,21 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; - -public class OpenBracketInline extends Inline { - @NonNull +/** + * Parses markdown links {@code [link](#href)} + * + * @since 4.2.0-SNAPSHOT + */ +public class OpenBracketInlineProcessor extends InlineProcessor { @Override - public Collection characters() { - return Collections.singleton('['); + public char specialCharacter() { + return '['; } @Override - public boolean parse() { - + protected boolean parse() { int startIndex = index; index++; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java similarity index 97% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java index 7765ca54..c2a92c3d 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java @@ -1,4 +1,4 @@ -package io.noties.markwon.sample.editor.inline; +package io.noties.markwon.inlineparser; import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -17,7 +17,6 @@ class StaggeredDelimiterProcessor implements DelimiterProcessor { this.delim = delim; } - @Override public char getOpeningCharacter() { return delim; diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java new file mode 100644 index 00000000..83f7771f --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java @@ -0,0 +1,7 @@ +package io.noties.markwon.inlineparser; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class UnderscoreDelimiterProcessor extends org.commonmark.internal.inline.UnderscoreDelimiterProcessor { +} diff --git a/sample/build.gradle b/sample/build.gradle index a7dec247..d2a9e27f 100644 --- a/sample/build.gradle +++ b/sample/build.gradle @@ -41,6 +41,7 @@ dependencies { implementation project(':markwon-ext-tasklist') implementation project(':markwon-html') implementation project(':markwon-image') + implementation project(':markwon-inline-parser') implementation project(':markwon-linkify') implementation project(':markwon-recycler') implementation project(':markwon-recycler-table') diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java index c16053fd..17387418 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java +++ b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java @@ -18,18 +18,12 @@ import android.widget.TextView; import androidx.annotation.NonNull; import androidx.annotation.Nullable; -import org.commonmark.internal.inline.AsteriskDelimiterProcessor; -import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; -import org.commonmark.node.Link; -import org.commonmark.node.Text; +import org.commonmark.parser.InlineParserFactory; import org.commonmark.parser.Parser; import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.concurrent.Executors; -import java.util.regex.Pattern; import io.noties.markwon.AbstractMarkwonPlugin; import io.noties.markwon.Markwon; @@ -43,17 +37,12 @@ import io.noties.markwon.editor.PersistedSpans; import io.noties.markwon.editor.handler.EmphasisEditHandler; import io.noties.markwon.editor.handler.StrongEmphasisEditHandler; import io.noties.markwon.ext.strikethrough.StrikethroughPlugin; +import io.noties.markwon.inlineparser.BangInlineProcessor; +import io.noties.markwon.inlineparser.EntityInlineProcessor; +import io.noties.markwon.inlineparser.HtmlInlineProcessor; +import io.noties.markwon.inlineparser.MarkwonInlineParser; import io.noties.markwon.linkify.LinkifyPlugin; import io.noties.markwon.sample.R; -import io.noties.markwon.sample.editor.inline.AutolinkInline; -import io.noties.markwon.sample.editor.inline.BackslashInline; -import io.noties.markwon.sample.editor.inline.BackticksInline; -import io.noties.markwon.sample.editor.inline.CloseBracketInline; -import io.noties.markwon.sample.editor.inline.EntityInline; -import io.noties.markwon.sample.editor.inline.HtmlInline; -import io.noties.markwon.sample.editor.inline.Inline; -import io.noties.markwon.sample.editor.inline.InlineParserImpl; -import io.noties.markwon.sample.editor.inline.NewLineInline; public class EditorActivity extends Activity { @@ -187,66 +176,15 @@ public class EditorActivity extends Activity { // for links to be clickable editText.setMovementMethod(LinkMovementMethod.getInstance()); - // provider? - final InlineParserImpl.Builder inlineParserFactoryBuilder = InlineParserImpl.builder() - .addDelimiterProcessor(new AsteriskDelimiterProcessor()) - .addDelimiterProcessor(new UnderscoreDelimiterProcessor()) - .addInlineProcessor(new AutolinkInline()) - .addInlineProcessor(new BackslashInline()) - .addInlineProcessor(new BackticksInline()) -// .addInlineProcessor(new BangInline()) // no images then - .addInlineProcessor(new CloseBracketInline()) - .addInlineProcessor(new EntityInline()) - .addInlineProcessor(new HtmlInline()) - .addInlineProcessor(new NewLineInline()) - .addInlineProcessor(new Inline() { - - private final Pattern RE = Pattern.compile("\\d+"); - - @NonNull - @Override - public Collection characters() { - return Collections.singleton('#'); - } - - @Override - public boolean parse() { - final String id = match(RE); - if (id != null) { - final Link link = new Link("https://github.com/noties/Markwon/issues/" + id, null); - final Text text = new Text("#" + id); - link.appendChild(text); - appendNode(link); - return true; - } - return false; - } - }) - .addInlineProcessor(new Inline() { - - private final Pattern RE = Pattern.compile("\\w+"); - - @NonNull - @Override - public Collection characters() { - return Collections.singleton('#'); - } - - @Override - public boolean parse() { - final String s = match(RE); - if (s != null) { - final Link link = new Link("https://noties.io", null); - final Text text = new Text("#" + s); - link.appendChild(text); - appendNode(link); - return true; - } - return false; - } - }) -// .addInlineProcessor(new OpenBracketInline()) - ; + final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + // no inline images will be parsed + .excludeInlineProcessor(BangInlineProcessor.class) + // no html tags will be parsed + .excludeInlineProcessor(HtmlInlineProcessor.class) + // no entities will be parsed (aka `&` etc) + .excludeInlineProcessor(EntityInlineProcessor.class) + .build(); final Markwon markwon = Markwon.builder(this) .usePlugin(StrikethroughPlugin.create()) @@ -254,9 +192,11 @@ public class EditorActivity extends Activity { .usePlugin(new AbstractMarkwonPlugin() { @Override public void configureParser(@NonNull Parser.Builder builder) { + // disable all commonmark-java blocks, only inlines will be parsed // builder.enabledBlockTypes(Collections.emptySet()); - builder.inlineParserFactory(inlineParserFactoryBuilder.build()); + + builder.inlineParserFactory(inlineParserFactory); } }) .build(); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java deleted file mode 100644 index de9326a6..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java +++ /dev/null @@ -1,429 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; - -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; -import org.commonmark.internal.util.Escaping; -import org.commonmark.node.Link; -import org.commonmark.node.Node; -import org.commonmark.node.Text; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public abstract class Inline { - - private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; - - protected static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - - protected static final Pattern WHITESPACE = Pattern.compile("\\s+"); - - protected static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); - - protected static final Pattern LINK_TITLE = Pattern.compile( - "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + - '|' + - "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + - '|' + - "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); - - protected static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); - - protected static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - - - protected InlineContext context; - protected Node block; - protected int index; - protected String input; - - protected void bind( - @NonNull InlineContext context, - @NonNull Node block, - @NonNull String input, - int index) { - this.context = context; - this.block = block; - this.input = input; - this.index = index; - } - - @NonNull - public abstract Collection characters(); - - public abstract boolean parse(); - - /** - * If RE matches at current index in the input, advance index and return the match; otherwise return null. - */ - protected String match(Pattern re) { - if (index >= input.length()) { - return null; - } - Matcher matcher = re.matcher(input); - matcher.region(index, input.length()); - boolean m = matcher.find(); - if (m) { - index = matcher.end(); - return matcher.group(); - } else { - return null; - } - } - - protected void appendNode(Node node) { - block.appendChild(node); - } - - protected Text appendText(CharSequence text, int beginIndex, int endIndex) { - return appendText(text.subSequence(beginIndex, endIndex)); - } - - protected Text appendText(CharSequence text) { - Text node = new Text(text.toString()); - appendNode(node); - return node; - } - - /** - * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. - */ - protected char peek() { - if (index < input.length()) { - return input.charAt(index); - } else { - return '\0'; - } - } - - protected void addBracket(Bracket bracket) { - final Bracket lastBracket = context.lastBracket(); - if (lastBracket != null) { - lastBracket.bracketAfter = true; - } - context.lastBracket(bracket); - } - - protected void removeLastBracket() { - final InlineContext context = this.context; - context.lastBracket(context.lastBracket().previous); - } - - protected Bracket lastBracket() { - return context.lastBracket(); - } - - protected Delimiter lastDelimiter() { - return context.lastDelimiter(); - } - - protected Map referenceMap() { - return context.referenceMap(); - } - - protected Map delimiterProcessors() { - return context.delimiterProcessors(); - } - - /** - * Parse zero or more space characters, including at most one newline. - */ - protected boolean spnl() { - match(SPNL); - return true; - } - - /** - * Attempt to parse link destination, returning the string or null if no match. - */ - protected String parseLinkDestination() { - String res = match(LINK_DESTINATION_BRACES); - if (res != null) { // chop off surrounding <..>: - if (res.length() == 2) { - return ""; - } else { - return Escaping.unescapeString(res.substring(1, res.length() - 1)); - } - } else { - int startIndex = index; - parseLinkDestinationWithBalancedParens(); - return Escaping.unescapeString(input.substring(startIndex, index)); - } - } - - protected void parseLinkDestinationWithBalancedParens() { - int parens = 0; - while (true) { - char c = peek(); - switch (c) { - case '\0': - return; - case '\\': - // check if we have an escapable character - if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { - // skip over the escaped character (after switch) - index++; - break; - } - // otherwise, we treat this as a literal backslash - break; - case '(': - parens++; - break; - case ')': - if (parens == 0) { - return; - } else { - parens--; - } - break; - case ' ': - // ASCII space - return; - default: - // or control character - if (Character.isISOControl(c)) { - return; - } - } - index++; - } - } - - /** - * Attempt to parse link title (sans quotes), returning the string or null if no match. - */ - protected String parseLinkTitle() { - String title = match(LINK_TITLE); - if (title != null) { - // chop off quotes from title and unescape: - return Escaping.unescapeString(title.substring(1, title.length() - 1)); - } else { - return null; - } - } - - /** - * Attempt to parse a link label, returning number of characters parsed. - */ - protected int parseLinkLabel() { - String m = match(LINK_LABEL); - // Spec says "A link label can have at most 999 characters inside the square brackets" - if (m == null || m.length() > 1001) { - return 0; - } else { - return m.length(); - } - } - - protected void processDelimiters(Delimiter stackBottom) { - - Map openersBottom = new HashMap<>(); - - // find first closer above stackBottom: - Delimiter closer = lastDelimiter(); - while (closer != null && closer.previous != stackBottom) { - closer = closer.previous; - } - // move forward, looking for closers, and handling each - while (closer != null) { - char delimiterChar = closer.delimiterChar; - - DelimiterProcessor delimiterProcessor = delimiterProcessors().get(delimiterChar); - if (!closer.canClose || delimiterProcessor == null) { - closer = closer.next; - continue; - } - - char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); - - // Found delimiter closer. Now look back for first matching opener. - int useDelims = 0; - boolean openerFound = false; - boolean potentialOpenerFound = false; - Delimiter opener = closer.previous; - while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { - if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { - potentialOpenerFound = true; - useDelims = delimiterProcessor.getDelimiterUse(opener, closer); - if (useDelims > 0) { - openerFound = true; - break; - } - } - opener = opener.previous; - } - - if (!openerFound) { - if (!potentialOpenerFound) { - // Set lower bound for future searches for openers. - // Only do this when we didn't even have a potential - // opener (one that matches the character and can open). - // If an opener was rejected because of the number of - // delimiters (e.g. because of the "multiple of 3" rule), - // we want to consider it next time because the number - // of delimiters can change as we continue processing. - openersBottom.put(delimiterChar, closer.previous); - if (!closer.canOpen) { - // We can remove a closer that can't be an opener, - // once we've seen there's no matching opener: - removeDelimiterKeepNode(closer); - } - } - closer = closer.next; - continue; - } - - Text openerNode = opener.node; - Text closerNode = closer.node; - - // Remove number of used delimiters from stack and inline nodes. - opener.length -= useDelims; - closer.length -= useDelims; - openerNode.setLiteral( - openerNode.getLiteral().substring(0, - openerNode.getLiteral().length() - useDelims)); - closerNode.setLiteral( - closerNode.getLiteral().substring(0, - closerNode.getLiteral().length() - useDelims)); - - removeDelimitersBetween(opener, closer); - // The delimiter processor can re-parent the nodes between opener and closer, - // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. - mergeTextNodesBetweenExclusive(openerNode, closerNode); - delimiterProcessor.process(openerNode, closerNode, useDelims); - - // No delimiter characters left to process, so we can remove delimiter and the now empty node. - if (opener.length == 0) { - removeDelimiterAndNode(opener); - } - - if (closer.length == 0) { - Delimiter next = closer.next; - removeDelimiterAndNode(closer); - closer = next; - } - } - - // remove all delimiters - Delimiter lastDelimiter; - while ((lastDelimiter = lastDelimiter()) != null) { - if (lastDelimiter != stackBottom) { - removeDelimiterKeepNode(lastDelimiter); - } else { - break; - } - } -// while (lastDelimiter != null && lastDelimiter != stackBottom) { -// removeDelimiterKeepNode(lastDelimiter); -// } - } - - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - protected void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - protected void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - protected void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - - protected void removeDelimitersBetween(Delimiter opener, Delimiter closer) { - Delimiter delimiter = closer.previous; - while (delimiter != null && delimiter != opener) { - Delimiter previousDelimiter = delimiter.previous; - removeDelimiterKeepNode(delimiter); - delimiter = previousDelimiter; - } - } - - /** - * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. - */ - protected void removeDelimiterAndNode(Delimiter delim) { - Text node = delim.node; - node.unlink(); - removeDelimiter(delim); - } - - /** - * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. - */ - protected void removeDelimiterKeepNode(Delimiter delim) { - removeDelimiter(delim); - } - - protected void removeDelimiter(Delimiter delim) { - if (delim.previous != null) { - delim.previous.next = delim.next; - } - if (delim.next == null) { - // top of stack -// lastDelimiter = delim.previous; - context.lastDelimiter(delim.previous); - } else { - delim.next.previous = delim.previous; - } - } -} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java deleted file mode 100644 index 0c3b88b7..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java +++ /dev/null @@ -1,62 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; -import org.commonmark.node.Link; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.Map; - -public class InlineContext { - - /** - * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different - * from the algorithm described in the spec.) - */ - private Delimiter lastDelimiter; - - /** - * Top opening bracket ([ or ![)). - */ - private Bracket lastBracket; - - /** - * Link references by ID, needs to be built up using parseReference before calling parse. - */ - private Map referenceMap; - - private Map delimiterProcessors; - - - public Delimiter lastDelimiter() { - return lastDelimiter; - } - - public void lastDelimiter(Delimiter lastDelimiter) { - this.lastDelimiter = lastDelimiter; - } - - public Bracket lastBracket() { - return lastBracket; - } - - public void lastBracket(Bracket lastBracket) { - this.lastBracket = lastBracket; - } - - public Map referenceMap() { - return referenceMap; - } - - public void referenceMap(Map referenceMap) { - this.referenceMap = referenceMap; - } - - public Map delimiterProcessors() { - return delimiterProcessors; - } - - public void delimiterProcessors(Map delimiterProcessors) { - this.delimiterProcessors = delimiterProcessors; - } -} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java deleted file mode 100644 index 12704f7e..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java +++ /dev/null @@ -1,1190 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; - -import org.commonmark.internal.Delimiter; -import org.commonmark.internal.ReferenceParser; -import org.commonmark.internal.util.Escaping; -import org.commonmark.node.Link; -import org.commonmark.node.Node; -import org.commonmark.node.Text; -import org.commonmark.parser.InlineParser; -import org.commonmark.parser.InlineParserFactory; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import io.noties.debug.Debug; - -public class InlineParserImpl implements InlineParser, ReferenceParser { - - public interface Builder { - - @NonNull - Builder addInlineProcessor(@NonNull Inline inline); - - @NonNull - Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor); - - @NonNull - InlineParserFactory build(); - } - - @NonNull - public static Builder builder() { - return new BuilderImpl(); - } - -// @NonNull -// public static InlineParserFactory factory() { -//// return context -> new InlineParserImpl(context.getCustomDelimiterProcessors()); -// } -// -// public static InlineParserFactory factory(Inline... inlines) { -// return context -> new InlineParserImpl(Arrays.asList(inlines), context.getCustomDelimiterProcessors()); -// } - - private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; -// private static final String HTMLCOMMENT = "|"; -// private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; -// private static final String DECLARATION = "]*>"; -// private static final String CDATA = ""; -// private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT -// + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; -// private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; - - private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; - private static final Pattern PUNCTUATION = Pattern - .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); - -// private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - - private static final Pattern LINK_TITLE = Pattern.compile( - "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + - '|' + - "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + - '|' + - "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); - - private static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); - - private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - - private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - -// private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); -// -// private static final Pattern TICKS = Pattern.compile("`+"); -// -// private static final Pattern TICKS_HERE = Pattern.compile("^`+"); -// -// private static final Pattern EMAIL_AUTOLINK = Pattern -// .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); -// -// private static final Pattern AUTOLINK = Pattern -// .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - - private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); - - private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); - -// private static final Pattern WHITESPACE = Pattern.compile("\\s+"); -// -// private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - - private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); - - private final BitSet specialCharacters; - private final BitSet delimiterCharacters; - private final Map delimiterProcessors; - - /** - * Link references by ID, needs to be built up using parseReference before calling parse. - */ -// private Map referenceMap = new HashMap<>(); - - private Node block; - - private String input; - private int index; - - /** - * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different - * from the algorithm described in the spec.) - */ -// private Delimiter lastDelimiter; - - /** - * Top opening bracket ([ or ![)). - */ -// private Bracket lastBracket; - - private final Map> inlines; - - private InlineContext inlineContext; - - - public InlineParserImpl(List inlines, List delimiterProcessors) { - this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); - this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); - this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); - // we must also put into special characters (otherwise won't be triggered) - this.inlines = calculateInlines(specialCharacters, inlines); - } - - @NonNull - private static Map> calculateInlines(@NonNull BitSet specialCharacters, @NonNull List inlines) { - final Map> map = new HashMap<>(inlines.size()); - List list; - for (Inline inline : inlines) { - for (Character character : inline.characters()) { - specialCharacters.set(character); - list = map.get(character); - if (list == null) { - list = new ArrayList<>(1); - map.put(character, list); - } - list.add(inline); - } - } - return map; - } - - public static BitSet calculateDelimiterCharacters(Set characters) { - BitSet bitSet = new BitSet(); - for (Character character : characters) { - bitSet.set(character); - } - return bitSet; - } - - public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { - BitSet bitSet = new BitSet(); - bitSet.or(delimiterCharacters); - bitSet.set('\n'); - bitSet.set('`'); - bitSet.set('['); - bitSet.set(']'); - bitSet.set('\\'); - bitSet.set('!'); - bitSet.set('<'); - bitSet.set('&'); - return bitSet; - } - - public static Map calculateDelimiterProcessors(List delimiterProcessors) { - Map map = new HashMap<>(); -// addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); - addDelimiterProcessors(delimiterProcessors, map); - return map; - } - - private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { - for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { - char opening = delimiterProcessor.getOpeningCharacter(); - char closing = delimiterProcessor.getClosingCharacter(); - if (opening == closing) { - DelimiterProcessor old = map.get(opening); - if (old != null && old.getOpeningCharacter() == old.getClosingCharacter()) { - StaggeredDelimiterProcessor s; - if (old instanceof StaggeredDelimiterProcessor) { - s = (StaggeredDelimiterProcessor) old; - } else { - s = new StaggeredDelimiterProcessor(opening); - s.add(old); - } - s.add(delimiterProcessor); - map.put(opening, s); - } else { - addDelimiterProcessorForChar(opening, delimiterProcessor, map); - } - } else { - addDelimiterProcessorForChar(opening, delimiterProcessor, map); - addDelimiterProcessorForChar(closing, delimiterProcessor, map); - } - } - } - - private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map delimiterProcessors) { - DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd); - if (existing != null) { - throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'"); - } - } - - /** - * Parse content in block into inline children, using reference map to resolve references. - */ - @Override - public void parse(String content, Node block) { - this.block = block; - this.input = content.trim(); - this.index = 0; -// this.lastDelimiter = null; -// this.lastBracket = null; - this.inlineContext = createInlineContext(); - - Debug.i(input); -// Debug.i(inlines); - - boolean moreToParse; - do { - moreToParse = parseInline(); - } while (moreToParse); - - // todo: must be somehow shared - processDelimiters(null); - mergeChildTextNodes(block); - } - - private InlineContext createInlineContext() { - final InlineContext context = new InlineContext(); - context.delimiterProcessors(delimiterProcessors); - context.referenceMap(new HashMap<>()); - return context; - } - - /** - * Attempt to parse a link reference, modifying the internal reference map. - */ - @Override - public int parseReference(String s) { - this.input = s; - this.index = 0; - String dest; - String title; - int matchChars; - int startIndex = index; - - // label: - matchChars = parseLinkLabel(); - if (matchChars == 0) { - return 0; - } - - String rawLabel = input.substring(0, matchChars); - - // colon: - if (peek() != ':') { - return 0; - } - index++; - - // link url - spnl(); - - dest = parseLinkDestination(); - if (dest == null || dest.length() == 0) { - return 0; - } - - int beforeTitle = index; - spnl(); - title = parseLinkTitle(); - if (title == null) { - // rewind before spaces - index = beforeTitle; - } - - boolean atLineEnd = true; - if (index != input.length() && match(LINE_END) == null) { - if (title == null) { - atLineEnd = false; - } else { - // the potential title we found is not at the line end, - // but it could still be a legal link reference if we - // discard the title - title = null; - // rewind before spaces - index = beforeTitle; - // and instead check if the link URL is at the line end - atLineEnd = match(LINE_END) != null; - } - } - - if (!atLineEnd) { - return 0; - } - - String normalizedLabel = Escaping.normalizeReference(rawLabel); - if (normalizedLabel.isEmpty()) { - return 0; - } - - final Map referenceMap = inlineContext.referenceMap(); - - if (!referenceMap.containsKey(normalizedLabel)) { - Link link = new Link(dest, title); - referenceMap.put(normalizedLabel, link); - } - return index - startIndex; - } - - private Text appendText(CharSequence text, int beginIndex, int endIndex) { - return appendText(text.subSequence(beginIndex, endIndex)); - } - - private Text appendText(CharSequence text) { - Text node = new Text(text.toString()); - appendNode(node); - return node; - } - - private void appendNode(Node node) { - block.appendChild(node); - } - - /** - * Parse the next inline element in subject, advancing input index. - * On success, add the result to block's children and return true. - * On failure, return false. - */ - private boolean parseInline() { - char c = peek(); - if (c == '\0') { - return false; - } - - boolean res = false; - - final List inlines = this.inlines.get(c); - - Debug.i("char: '%s', inlines: %s", c, inlines); - - if (inlines != null) { - for (Inline inline : inlines) { - res = processInline(inline); - Debug.i("char: '%s', res: %s, inline: %s", c, res, inline); - if (res) { - break; - } - } - } else { - boolean isDelimiter = delimiterCharacters.get(c); - if (isDelimiter) { - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); - res = parseDelimiters(delimiterProcessor, c); - } else { - res = parseString(); - } - } - -// switch (c) { -// case '\n': -// res = parseNewline(); -// break; -// case '\\': -// res = parseBackslash(); -// break; -// case '`': -// res = parseBackticks(); -// break; -// case '[': -// res = parseOpenBracket(); -// break; -// case '!': -// res = parseBang(); -// break; -// case ']': -// res = parseCloseBracket(); -// break; -// case '<': -// res = parseAutolink() || parseHtmlInline(); -// break; -// case '&': -// res = parseEntity(); -// break; -// default: -// boolean isDelimiter = delimiterCharacters.get(c); -// if (isDelimiter) { -// DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); -// res = parseDelimiters(delimiterProcessor, c); -// } else { -// res = parseString(); -// } -// break; -// } - if (!res) { - index++; - // When we get here, it's only for a single special character that turned out to not have a special meaning. - // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. - String literal = String.valueOf(c); - appendText(literal); - } - - return true; - } - - private boolean processInline(@NonNull Inline inline) { - inline.bind(inlineContext, block, input, index); - final boolean result = inline.parse(); - index = inline.index; - return result; - } - - /** - * If RE matches at current index in the input, advance index and return the match; otherwise return null. - */ - private String match(Pattern re) { - if (index >= input.length()) { - return null; - } - Matcher matcher = re.matcher(input); - matcher.region(index, input.length()); - boolean m = matcher.find(); - if (m) { - index = matcher.end(); - return matcher.group(); - } else { - return null; - } - } - - /** - * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. - */ - private char peek() { - if (index < input.length()) { - return input.charAt(index); - } else { - return '\0'; - } - } - - /** - * Parse zero or more space characters, including at most one newline. - */ - private boolean spnl() { - match(SPNL); - return true; - } - -// /** -// * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. -// */ -// private boolean parseNewline() { -// index++; // assume we're at a \n -// -// Node lastChild = block.getLastChild(); -// // Check previous text for trailing spaces. -// // The "endsWith" is an optimization to avoid an RE match in the common case. -// if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { -// Text text = (Text) lastChild; -// String literal = text.getLiteral(); -// Matcher matcher = FINAL_SPACE.matcher(literal); -// int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; -// if (spaces > 0) { -// text.setLiteral(literal.substring(0, literal.length() - spaces)); -// } -// appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); -// } else { -// appendNode(new SoftLineBreak()); -// } -// -// // gobble leading spaces in next line -// while (peek() == ' ') { -// index++; -// } -// return true; -// } - -// /** -// * Parse a backslash-escaped special character, adding either the escaped character, a hard line break -// * (if the backslash is followed by a newline), or a literal backslash to the block's children. -// */ -// private boolean parseBackslash() { -// index++; -// if (peek() == '\n') { -// appendNode(new HardLineBreak()); -// index++; -// } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { -// appendText(input, index, index + 1); -// index++; -// } else { -// appendText("\\"); -// } -// return true; -// } - -// /** -// * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. -// */ -// private boolean parseBackticks() { -// String ticks = match(TICKS_HERE); -// if (ticks == null) { -// return false; -// } -// int afterOpenTicks = index; -// String matched; -// while ((matched = match(TICKS)) != null) { -// if (matched.equals(ticks)) { -// Code node = new Code(); -// String content = input.substring(afterOpenTicks, index - ticks.length()); -// String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); -// node.setLiteral(literal); -// appendNode(node); -// return true; -// } -// } -// // If we got here, we didn't match a closing backtick sequence. -// index = afterOpenTicks; -// appendText(ticks); -// return true; -// } - - /** - * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. - */ - private boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { - DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); - if (res == null) { - return false; - } - int length = res.count; - int startIndex = index; - - index += length; - Text node = appendText(input, startIndex, index); - - // Add entry to stack for this opener - - final Delimiter lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, inlineContext.lastDelimiter()); - lastDelimiter.length = length; - lastDelimiter.originalLength = length; - if (lastDelimiter.previous != null) { - lastDelimiter.previous.next = lastDelimiter; - } - inlineContext.lastDelimiter(lastDelimiter); - - return true; - } - -// /** -// * Add open bracket to delimiter stack and add a text node to block's children. -// */ -// private boolean parseOpenBracket() { -// int startIndex = index; -// index++; -// -// Text node = appendText("["); -// -// // Add entry to stack for this opener -// addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); -// -// return true; -// } - -// /** -// * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. -// * Otherwise just add a text node. -// */ -// private boolean parseBang() { -// int startIndex = index; -// index++; -// if (peek() == '[') { -// index++; -// -// Text node = appendText("!["); -// -// // Add entry to stack for this opener -// addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); -// } else { -// appendText("!"); -// } -// return true; -// } - -// /** -// * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a -// * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. -// */ -// private boolean parseCloseBracket() { -// index++; -// int startIndex = index; -// -// // Get previous `[` or `![` -// Bracket opener = lastBracket; -// if (opener == null) { -// // No matching opener, just return a literal. -// appendText("]"); -// return true; -// } -// -// if (!opener.allowed) { -// // Matching opener but it's not allowed, just return a literal. -// appendText("]"); -// removeLastBracket(); -// return true; -// } -// -// // Check to see if we have a link/image -// -// String dest = null; -// String title = null; -// boolean isLinkOrImage = false; -// -// // Maybe a inline link like `[foo](/uri "title")` -// if (peek() == '(') { -// index++; -// spnl(); -// if ((dest = parseLinkDestination()) != null) { -// spnl(); -// // title needs a whitespace before -// if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { -// title = parseLinkTitle(); -// spnl(); -// } -// if (peek() == ')') { -// index++; -// isLinkOrImage = true; -// } else { -// index = startIndex; -// } -// } -// } -// -// // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` -// if (!isLinkOrImage) { -// -// // See if there's a link label like `[bar]` or `[]` -// int beforeLabel = index; -// int labelLength = parseLinkLabel(); -// String ref = null; -// if (labelLength > 2) { -// ref = input.substring(beforeLabel, beforeLabel + labelLength); -// } else if (!opener.bracketAfter) { -// // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. -// // But it can only be a reference when there's no (unescaped) bracket in it. -// // If there is, we don't even need to try to look up the reference. This is an optimization. -// ref = input.substring(opener.index, startIndex); -// } -// -// if (ref != null) { -// Link link = referenceMap.get(Escaping.normalizeReference(ref)); -// if (link != null) { -// dest = link.getDestination(); -// title = link.getTitle(); -// isLinkOrImage = true; -// } -// } -// } -// -// if (isLinkOrImage) { -// // If we got here, open is a potential opener -// Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); -// -// Node node = opener.node.getNext(); -// while (node != null) { -// Node next = node.getNext(); -// linkOrImage.appendChild(node); -// node = next; -// } -// appendNode(linkOrImage); -// -// // Process delimiters such as emphasis inside link/image -// processDelimiters(opener.previousDelimiter); -// mergeChildTextNodes(linkOrImage); -// // We don't need the corresponding text node anymore, we turned it into a link/image node -// opener.node.unlink(); -// removeLastBracket(); -// -// // Links within links are not allowed. We found this link, so there can be no other link around it. -// if (!opener.image) { -// Bracket bracket = lastBracket; -// while (bracket != null) { -// if (!bracket.image) { -// // Disallow link opener. It will still get matched, but will not result in a link. -// bracket.allowed = false; -// } -// bracket = bracket.previous; -// } -// } -// -// return true; -// -// } else { // no link or image -// -// appendText("]"); -// removeLastBracket(); -// -// index = startIndex; -// return true; -// } -// } - -// private void addBracket(Bracket bracket) { -// if (lastBracket != null) { -// lastBracket.bracketAfter = true; -// } -// lastBracket = bracket; -// } -// -// private void removeLastBracket() { -// lastBracket = lastBracket.previous; -// } - - /** - * Attempt to parse link destination, returning the string or null if no match. - */ - private String parseLinkDestination() { - String res = match(LINK_DESTINATION_BRACES); - if (res != null) { // chop off surrounding <..>: - if (res.length() == 2) { - return ""; - } else { - return Escaping.unescapeString(res.substring(1, res.length() - 1)); - } - } else { - int startIndex = index; - parseLinkDestinationWithBalancedParens(); - return Escaping.unescapeString(input.substring(startIndex, index)); - } - } - - private void parseLinkDestinationWithBalancedParens() { - int parens = 0; - while (true) { - char c = peek(); - switch (c) { - case '\0': - return; - case '\\': - // check if we have an escapable character - if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { - // skip over the escaped character (after switch) - index++; - break; - } - // otherwise, we treat this as a literal backslash - break; - case '(': - parens++; - break; - case ')': - if (parens == 0) { - return; - } else { - parens--; - } - break; - case ' ': - // ASCII space - return; - default: - // or control character - if (Character.isISOControl(c)) { - return; - } - } - index++; - } - } - - /** - * Attempt to parse link title (sans quotes), returning the string or null if no match. - */ - private String parseLinkTitle() { - String title = match(LINK_TITLE); - if (title != null) { - // chop off quotes from title and unescape: - return Escaping.unescapeString(title.substring(1, title.length() - 1)); - } else { - return null; - } - } - - /** - * Attempt to parse a link label, returning number of characters parsed. - */ - private int parseLinkLabel() { - String m = match(LINK_LABEL); - // Spec says "A link label can have at most 999 characters inside the square brackets" - if (m == null || m.length() > 1001) { - return 0; - } else { - return m.length(); - } - } - -// /** -// * Attempt to parse an autolink (URL or email in pointy brackets). -// */ -// private boolean parseAutolink() { -// String m; -// if ((m = match(EMAIL_AUTOLINK)) != null) { -// String dest = m.substring(1, m.length() - 1); -// Link node = new Link("mailto:" + dest, null); -// node.appendChild(new Text(dest)); -// appendNode(node); -// return true; -// } else if ((m = match(AUTOLINK)) != null) { -// String dest = m.substring(1, m.length() - 1); -// Link node = new Link(dest, null); -// node.appendChild(new Text(dest)); -// appendNode(node); -// return true; -// } else { -// return false; -// } -// } - -// /** -// * Attempt to parse inline HTML. -// */ -// private boolean parseHtmlInline() { -// String m = match(HTML_TAG); -// if (m != null) { -// HtmlInline node = new HtmlInline(); -// node.setLiteral(m); -// appendNode(node); -// return true; -// } else { -// return false; -// } -// } - -// /** -// * Attempt to parse an entity, return Entity object if successful. -// */ -// private boolean parseEntity() { -// String m; -// if ((m = match(ENTITY_HERE)) != null) { -// appendText(Html5Entities.entityToString(m)); -// return true; -// } else { -// return false; -// } -// } - - /** - * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. - */ - private boolean parseString() { - int begin = index; - int length = input.length(); - while (index != length) { - if (specialCharacters.get(input.charAt(index))) { - break; - } - index++; - } - if (begin != index) { - appendText(input, begin, index); - return true; - } else { - return false; - } - } - - /** - * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters - * and whether they are positioned such that they can open and/or close emphasis or strong emphasis. - * - * @return information about delimiter run, or {@code null} - */ - private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { - int startIndex = index; - - int delimiterCount = 0; - while (peek() == delimiterChar) { - delimiterCount++; - index++; - } - - if (delimiterCount < delimiterProcessor.getMinLength()) { - index = startIndex; - return null; - } - - String before = startIndex == 0 ? "\n" : - input.substring(startIndex - 1, startIndex); - - char charAfter = peek(); - String after = charAfter == '\0' ? "\n" : - String.valueOf(charAfter); - - // We could be more lazy here, in most cases we don't need to do every match case. - boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches(); - boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches(); - boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches(); - boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches(); - - boolean leftFlanking = !afterIsWhitespace && - (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); - boolean rightFlanking = !beforeIsWhitespace && - (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation); - boolean canOpen; - boolean canClose; - if (delimiterChar == '_') { - canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation); - canClose = rightFlanking && (!leftFlanking || afterIsPunctuation); - } else { - canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter(); - canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); - } - - index = startIndex; - return new DelimiterData(delimiterCount, canOpen, canClose); - } - - private void processDelimiters(Delimiter stackBottom) { - - Map openersBottom = new HashMap<>(); - - // find first closer above stackBottom: - Delimiter closer = inlineContext.lastDelimiter(); - while (closer != null && closer.previous != stackBottom) { - closer = closer.previous; - } - // move forward, looking for closers, and handling each - while (closer != null) { - char delimiterChar = closer.delimiterChar; - - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar); - if (!closer.canClose || delimiterProcessor == null) { - closer = closer.next; - continue; - } - - char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); - - // Found delimiter closer. Now look back for first matching opener. - int useDelims = 0; - boolean openerFound = false; - boolean potentialOpenerFound = false; - Delimiter opener = closer.previous; - while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { - if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { - potentialOpenerFound = true; - useDelims = delimiterProcessor.getDelimiterUse(opener, closer); - if (useDelims > 0) { - openerFound = true; - break; - } - } - opener = opener.previous; - } - - if (!openerFound) { - if (!potentialOpenerFound) { - // Set lower bound for future searches for openers. - // Only do this when we didn't even have a potential - // opener (one that matches the character and can open). - // If an opener was rejected because of the number of - // delimiters (e.g. because of the "multiple of 3" rule), - // we want to consider it next time because the number - // of delimiters can change as we continue processing. - openersBottom.put(delimiterChar, closer.previous); - if (!closer.canOpen) { - // We can remove a closer that can't be an opener, - // once we've seen there's no matching opener: - removeDelimiterKeepNode(closer); - } - } - closer = closer.next; - continue; - } - - Text openerNode = opener.node; - Text closerNode = closer.node; - - // Remove number of used delimiters from stack and inline nodes. - opener.length -= useDelims; - closer.length -= useDelims; - openerNode.setLiteral( - openerNode.getLiteral().substring(0, - openerNode.getLiteral().length() - useDelims)); - closerNode.setLiteral( - closerNode.getLiteral().substring(0, - closerNode.getLiteral().length() - useDelims)); - - removeDelimitersBetween(opener, closer); - // The delimiter processor can re-parent the nodes between opener and closer, - // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. - mergeTextNodesBetweenExclusive(openerNode, closerNode); - delimiterProcessor.process(openerNode, closerNode, useDelims); - - // No delimiter characters left to process, so we can remove delimiter and the now empty node. - if (opener.length == 0) { - removeDelimiterAndNode(opener); - } - - if (closer.length == 0) { - Delimiter next = closer.next; - removeDelimiterAndNode(closer); - closer = next; - } - } - - // remove all delimiters - Delimiter lastDelimiter; - while (((lastDelimiter = inlineContext.lastDelimiter())) != null) { - if (lastDelimiter != stackBottom) { - removeDelimiterKeepNode(lastDelimiter); - } else { - break; - } - } -// while (lastDelimiter != null && lastDelimiter != stackBottom) { -// removeDelimiterKeepNode(lastDelimiter); -// } - } - - private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { - Delimiter delimiter = closer.previous; - while (delimiter != null && delimiter != opener) { - Delimiter previousDelimiter = delimiter.previous; - removeDelimiterKeepNode(delimiter); - delimiter = previousDelimiter; - } - } - - /** - * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. - */ - private void removeDelimiterAndNode(Delimiter delim) { - Text node = delim.node; - node.unlink(); - removeDelimiter(delim); - } - - /** - * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. - */ - private void removeDelimiterKeepNode(Delimiter delim) { - removeDelimiter(delim); - } - - private void removeDelimiter(Delimiter delim) { - if (delim.previous != null) { - delim.previous.next = delim.next; - } - if (delim.next == null) { - // top of stack -// lastDelimiter = delim.previous; - inlineContext.lastDelimiter(delim.previous); - } else { - delim.next.previous = delim.previous; - } - } - - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - private void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - private void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - private void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - - private static class DelimiterData { - - final int count; - final boolean canClose; - final boolean canOpen; - - DelimiterData(int count, boolean canOpen, boolean canClose) { - this.count = count; - this.canOpen = canOpen; - this.canClose = canClose; - } - } - - private static class BuilderImpl implements Builder { - - private final List inlines = new ArrayList<>(); - private final List delimiterProcessors = new ArrayList<>(); - - @NonNull - @Override - public Builder addInlineProcessor(@NonNull Inline inline) { - inlines.add(inline); - return this; - } - - @NonNull - @Override - public Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor) { - delimiterProcessors.add(delimiterProcessor); - return this; - } - - @NonNull - @Override - public InlineParserFactory build() { - return inlineParserContext -> { - final List processors; - final List custom = inlineParserContext.getCustomDelimiterProcessors(); - if (custom != null && !custom.isEmpty()) { - processors = new ArrayList<>(delimiterProcessors); - processors.addAll(custom); - } else { - processors = delimiterProcessors; - } - return new InlineParserImpl(inlines, processors); - }; - } - } -} diff --git a/settings.gradle b/settings.gradle index f684fbb7..7ca10d2a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,6 +10,7 @@ include ':app', ':sample', ':markwon-image', ':markwon-image-glide', ':markwon-image-picasso', + ':markwon-inline-parser', ':markwon-linkify', ':markwon-recycler', ':markwon-recycler-table',