From e95defb67c704607aef0b3cbe31b2eeeccfd633e Mon Sep 17 00:00:00 2001 From: Dimitry Ivanov Date: Tue, 12 Nov 2019 17:32:54 +0300 Subject: [PATCH 1/4] It is alive --- .../markwon/sample/editor/EditorActivity.java | 166 ++- .../sample/editor/inline/AutolinkInline.java | 45 + .../sample/editor/inline/BackslashInline.java | 31 + .../sample/editor/inline/BackticksInline.java | 46 + .../sample/editor/inline/BangInline.java | 34 + .../editor/inline/CloseBracketInline.java | 135 ++ .../sample/editor/inline/EntityInline.java | 32 + .../sample/editor/inline/HtmlInline.java | 39 + .../markwon/sample/editor/inline/Inline.java | 429 ++++++ .../sample/editor/inline/InlineContext.java | 62 + .../editor/inline/InlineParserImpl.java | 1190 +++++++++++++++++ .../editor/inline/InlineParserOriginal.java | 1060 +++++++++++++++ .../sample/editor/inline/NewLineInline.java | 51 + .../editor/inline/OpenBracketInline.java | 31 + .../inline/StaggeredDelimiterProcessor.java | 76 ++ 15 files changed, 3383 insertions(+), 44 deletions(-) create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java index f7389827..c16053fd 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java +++ b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java @@ -18,11 +18,18 @@ import android.widget.TextView; import androidx.annotation.NonNull; import androidx.annotation.Nullable; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; +import org.commonmark.node.Link; +import org.commonmark.node.Text; import org.commonmark.parser.Parser; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.concurrent.Executors; +import java.util.regex.Pattern; import io.noties.markwon.AbstractMarkwonPlugin; import io.noties.markwon.Markwon; @@ -38,6 +45,15 @@ import io.noties.markwon.editor.handler.StrongEmphasisEditHandler; import io.noties.markwon.ext.strikethrough.StrikethroughPlugin; import io.noties.markwon.linkify.LinkifyPlugin; import io.noties.markwon.sample.R; +import io.noties.markwon.sample.editor.inline.AutolinkInline; +import io.noties.markwon.sample.editor.inline.BackslashInline; +import io.noties.markwon.sample.editor.inline.BackticksInline; +import io.noties.markwon.sample.editor.inline.CloseBracketInline; +import io.noties.markwon.sample.editor.inline.EntityInline; +import io.noties.markwon.sample.editor.inline.HtmlInline; +import io.noties.markwon.sample.editor.inline.Inline; +import io.noties.markwon.sample.editor.inline.InlineParserImpl; +import io.noties.markwon.sample.editor.inline.NewLineInline; public class EditorActivity extends Activity { @@ -102,52 +118,52 @@ public class EditorActivity extends Activity { private void additional_edit_span() { // An additional span is used to highlight strong-emphasis -final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this)) - .useEditHandler(new AbstractEditHandler() { - @Override - public void configurePersistedSpans(@NonNull PersistedSpans.Builder builder) { - // Here we define which span is _persisted_ in EditText, it is not removed - // from EditText between text changes, but instead - reused (by changing - // position). Consider it as a cache for spans. We could use `StrongEmphasisSpan` - // here also, but I chose Bold to indicate that this span is not the same - // as in off-screen rendered markdown - builder.persistSpan(Bold.class, Bold::new); - } + final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this)) + .useEditHandler(new AbstractEditHandler() { + @Override + public void configurePersistedSpans(@NonNull PersistedSpans.Builder builder) { + // Here we define which span is _persisted_ in EditText, it is not removed + // from EditText between text changes, but instead - reused (by changing + // position). Consider it as a cache for spans. We could use `StrongEmphasisSpan` + // here also, but I chose Bold to indicate that this span is not the same + // as in off-screen rendered markdown + builder.persistSpan(Bold.class, Bold::new); + } - @Override - public void handleMarkdownSpan( - @NonNull PersistedSpans persistedSpans, - @NonNull Editable editable, - @NonNull String input, - @NonNull StrongEmphasisSpan span, - int spanStart, - int spanTextLength) { - // Unfortunately we cannot hardcode delimiters length here (aka spanTextLength + 4) - // because multiple inline markdown nodes can refer to the same text. - // For example, `**_~~hey~~_**` - we will receive `**_~~` in this method, - // and thus will have to manually find actual position in raw user input - final MarkwonEditorUtils.Match match = - MarkwonEditorUtils.findDelimited(input, spanStart, "**", "__"); - if (match != null) { - editable.setSpan( - // we handle StrongEmphasisSpan and represent it with Bold in EditText - // we still could use StrongEmphasisSpan, but it must be accessed - // via persistedSpans - persistedSpans.get(Bold.class), - match.start(), - match.end(), - Spanned.SPAN_EXCLUSIVE_EXCLUSIVE - ); - } - } + @Override + public void handleMarkdownSpan( + @NonNull PersistedSpans persistedSpans, + @NonNull Editable editable, + @NonNull String input, + @NonNull StrongEmphasisSpan span, + int spanStart, + int spanTextLength) { + // Unfortunately we cannot hardcode delimiters length here (aka spanTextLength + 4) + // because multiple inline markdown nodes can refer to the same text. + // For example, `**_~~hey~~_**` - we will receive `**_~~` in this method, + // and thus will have to manually find actual position in raw user input + final MarkwonEditorUtils.Match match = + MarkwonEditorUtils.findDelimited(input, spanStart, "**", "__"); + if (match != null) { + editable.setSpan( + // we handle StrongEmphasisSpan and represent it with Bold in EditText + // we still could use StrongEmphasisSpan, but it must be accessed + // via persistedSpans + persistedSpans.get(Bold.class), + match.start(), + match.end(), + Spanned.SPAN_EXCLUSIVE_EXCLUSIVE + ); + } + } - @NonNull - @Override - public Class markdownSpanType() { - return StrongEmphasisSpan.class; - } - }) - .build(); + @NonNull + @Override + public Class markdownSpanType() { + return StrongEmphasisSpan.class; + } + }) + .build(); editText.addTextChangedListener(MarkwonEditorTextWatcher.withProcess(editor)); } @@ -171,6 +187,67 @@ final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this)) // for links to be clickable editText.setMovementMethod(LinkMovementMethod.getInstance()); + // provider? + final InlineParserImpl.Builder inlineParserFactoryBuilder = InlineParserImpl.builder() + .addDelimiterProcessor(new AsteriskDelimiterProcessor()) + .addDelimiterProcessor(new UnderscoreDelimiterProcessor()) + .addInlineProcessor(new AutolinkInline()) + .addInlineProcessor(new BackslashInline()) + .addInlineProcessor(new BackticksInline()) +// .addInlineProcessor(new BangInline()) // no images then + .addInlineProcessor(new CloseBracketInline()) + .addInlineProcessor(new EntityInline()) + .addInlineProcessor(new HtmlInline()) + .addInlineProcessor(new NewLineInline()) + .addInlineProcessor(new Inline() { + + private final Pattern RE = Pattern.compile("\\d+"); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('#'); + } + + @Override + public boolean parse() { + final String id = match(RE); + if (id != null) { + final Link link = new Link("https://github.com/noties/Markwon/issues/" + id, null); + final Text text = new Text("#" + id); + link.appendChild(text); + appendNode(link); + return true; + } + return false; + } + }) + .addInlineProcessor(new Inline() { + + private final Pattern RE = Pattern.compile("\\w+"); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('#'); + } + + @Override + public boolean parse() { + final String s = match(RE); + if (s != null) { + final Link link = new Link("https://noties.io", null); + final Text text = new Text("#" + s); + link.appendChild(text); + appendNode(link); + return true; + } + return false; + } + }) +// .addInlineProcessor(new OpenBracketInline()) + ; + final Markwon markwon = Markwon.builder(this) .usePlugin(StrikethroughPlugin.create()) .usePlugin(LinkifyPlugin.create()) @@ -179,6 +256,7 @@ final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this)) public void configureParser(@NonNull Parser.Builder builder) { // disable all commonmark-java blocks, only inlines will be parsed // builder.enabledBlockTypes(Collections.emptySet()); + builder.inlineParserFactory(inlineParserFactoryBuilder.build()); } }) .build(); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java new file mode 100644 index 00000000..beaa72c4 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java @@ -0,0 +1,45 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.node.Link; +import org.commonmark.node.Text; + +import java.util.Collection; +import java.util.Collections; +import java.util.regex.Pattern; + +public class AutolinkInline extends Inline { + + private static final Pattern EMAIL_AUTOLINK = Pattern + .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); + + private static final Pattern AUTOLINK = Pattern + .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('<'); + } + + @Override + public boolean parse() { + String m; + if ((m = match(EMAIL_AUTOLINK)) != null) { + String dest = m.substring(1, m.length() - 1); + Link node = new Link("mailto:" + dest, null); + node.appendChild(new Text(dest)); + appendNode(node); + return true; + } else if ((m = match(AUTOLINK)) != null) { + String dest = m.substring(1, m.length() - 1); + Link node = new Link(dest, null); + node.appendChild(new Text(dest)); + appendNode(node); + return true; + } else { + return false; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java new file mode 100644 index 00000000..72b21060 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java @@ -0,0 +1,31 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.node.HardLineBreak; + +import java.util.Collection; +import java.util.Collections; + +public class BackslashInline extends Inline { + @NonNull + @Override + public Collection characters() { + return Collections.singleton('\\'); + } + + @Override + public boolean parse() { + index++; + if (peek() == '\n') { + appendNode(new HardLineBreak()); + index++; + } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { + appendText(input, index, index + 1); + index++; + } else { + appendText("\\"); + } + return true; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java new file mode 100644 index 00000000..4ead3d4d --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java @@ -0,0 +1,46 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.node.Code; + +import java.util.Collection; +import java.util.Collections; +import java.util.regex.Pattern; + +public class BackticksInline extends Inline { + + private static final Pattern TICKS = Pattern.compile("`+"); + + private static final Pattern TICKS_HERE = Pattern.compile("^`+"); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('`'); + } + + @Override + public boolean parse() { + String ticks = match(TICKS_HERE); + if (ticks == null) { + return false; + } + int afterOpenTicks = index; + String matched; + while ((matched = match(TICKS)) != null) { + if (matched.equals(ticks)) { + Code node = new Code(); + String content = input.substring(afterOpenTicks, index - ticks.length()); + String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); + node.setLiteral(literal); + appendNode(node); + return true; + } + } + // If we got here, we didn't match a closing backtick sequence. + index = afterOpenTicks; + appendText(ticks); + return true; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java new file mode 100644 index 00000000..0416f40c --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java @@ -0,0 +1,34 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Bracket; +import org.commonmark.node.Text; + +import java.util.Collection; +import java.util.Collections; + +public class BangInline extends Inline { + @NonNull + @Override + public Collection characters() { + return Collections.singleton('!'); + } + + @Override + public boolean parse() { + int startIndex = index; + index++; + if (peek() == '[') { + index++; + + Text node = appendText("!["); + + // Add entry to stack for this opener + addBracket(Bracket.image(node, startIndex + 1, lastBracket(), lastDelimiter())); + } else { + appendText("!"); + } + return true; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java new file mode 100644 index 00000000..78366685 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java @@ -0,0 +1,135 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.util.Escaping; +import org.commonmark.node.Image; +import org.commonmark.node.Link; +import org.commonmark.node.Node; + +import java.util.Collection; +import java.util.Collections; + +public class CloseBracketInline extends Inline { + @NonNull + @Override + public Collection characters() { + return Collections.singleton(']'); + } + + @Override + public boolean parse() { + index++; + int startIndex = index; + + // Get previous `[` or `![` + Bracket opener = lastBracket(); + if (opener == null) { + // No matching opener, just return a literal. + appendText("]"); + return true; + } + + if (!opener.allowed) { + // Matching opener but it's not allowed, just return a literal. + appendText("]"); + removeLastBracket(); + return true; + } + + // Check to see if we have a link/image + + String dest = null; + String title = null; + boolean isLinkOrImage = false; + + // Maybe a inline link like `[foo](/uri "title")` + if (peek() == '(') { + index++; + spnl(); + if ((dest = parseLinkDestination()) != null) { + spnl(); + // title needs a whitespace before + if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { + title = parseLinkTitle(); + spnl(); + } + if (peek() == ')') { + index++; + isLinkOrImage = true; + } else { + index = startIndex; + } + } + } + + // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` + if (!isLinkOrImage) { + + // See if there's a link label like `[bar]` or `[]` + int beforeLabel = index; + int labelLength = parseLinkLabel(); + String ref = null; + if (labelLength > 2) { + ref = input.substring(beforeLabel, beforeLabel + labelLength); + } else if (!opener.bracketAfter) { + // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. + // But it can only be a reference when there's no (unescaped) bracket in it. + // If there is, we don't even need to try to look up the reference. This is an optimization. + ref = input.substring(opener.index, startIndex); + } + + if (ref != null) { + Link link = referenceMap().get(Escaping.normalizeReference(ref)); + if (link != null) { + dest = link.getDestination(); + title = link.getTitle(); + isLinkOrImage = true; + } + } + } + + if (isLinkOrImage) { + // If we got here, open is a potential opener + Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); + + Node node = opener.node.getNext(); + while (node != null) { + Node next = node.getNext(); + linkOrImage.appendChild(node); + node = next; + } + appendNode(linkOrImage); + + // Process delimiters such as emphasis inside link/image + processDelimiters(opener.previousDelimiter); + mergeChildTextNodes(linkOrImage); + // We don't need the corresponding text node anymore, we turned it into a link/image node + opener.node.unlink(); + removeLastBracket(); + + // Links within links are not allowed. We found this link, so there can be no other link around it. + if (!opener.image) { + Bracket bracket = lastBracket(); + while (bracket != null) { + if (!bracket.image) { + // Disallow link opener. It will still get matched, but will not result in a link. + bracket.allowed = false; + } + bracket = bracket.previous; + } + } + + return true; + + } else { // no link or image + + appendText("]"); + removeLastBracket(); + + index = startIndex; + return true; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java new file mode 100644 index 00000000..f7592aed --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java @@ -0,0 +1,32 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.util.Html5Entities; + +import java.util.Collection; +import java.util.Collections; +import java.util.regex.Pattern; + +public class EntityInline extends Inline { + + private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; + private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('&'); + } + + @Override + public boolean parse() { + String m; + if ((m = match(ENTITY_HERE)) != null) { + appendText(Html5Entities.entityToString(m)); + return true; + } else { + return false; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java new file mode 100644 index 00000000..34686ec2 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java @@ -0,0 +1,39 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.util.Parsing; + +import java.util.Collection; +import java.util.Collections; +import java.util.regex.Pattern; + +public class HtmlInline extends Inline { + + private static final String HTMLCOMMENT = "|"; + private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; + private static final String DECLARATION = "]*>"; + private static final String CDATA = ""; + private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT + + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; + private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('<'); + } + + @Override + public boolean parse() { + String m = match(HTML_TAG); + if (m != null) { + org.commonmark.node.HtmlInline node = new org.commonmark.node.HtmlInline(); + node.setLiteral(m); + appendNode(node); + return true; + } else { + return false; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java new file mode 100644 index 00000000..de9326a6 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java @@ -0,0 +1,429 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.util.Escaping; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; +import org.commonmark.parser.delimiter.DelimiterProcessor; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public abstract class Inline { + + private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; + + protected static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + + protected static final Pattern WHITESPACE = Pattern.compile("\\s+"); + + protected static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); + + protected static final Pattern LINK_TITLE = Pattern.compile( + "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + + '|' + + "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + + '|' + + "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); + + protected static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); + + protected static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); + + + protected InlineContext context; + protected Node block; + protected int index; + protected String input; + + protected void bind( + @NonNull InlineContext context, + @NonNull Node block, + @NonNull String input, + int index) { + this.context = context; + this.block = block; + this.input = input; + this.index = index; + } + + @NonNull + public abstract Collection characters(); + + public abstract boolean parse(); + + /** + * If RE matches at current index in the input, advance index and return the match; otherwise return null. + */ + protected String match(Pattern re) { + if (index >= input.length()) { + return null; + } + Matcher matcher = re.matcher(input); + matcher.region(index, input.length()); + boolean m = matcher.find(); + if (m) { + index = matcher.end(); + return matcher.group(); + } else { + return null; + } + } + + protected void appendNode(Node node) { + block.appendChild(node); + } + + protected Text appendText(CharSequence text, int beginIndex, int endIndex) { + return appendText(text.subSequence(beginIndex, endIndex)); + } + + protected Text appendText(CharSequence text) { + Text node = new Text(text.toString()); + appendNode(node); + return node; + } + + /** + * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. + */ + protected char peek() { + if (index < input.length()) { + return input.charAt(index); + } else { + return '\0'; + } + } + + protected void addBracket(Bracket bracket) { + final Bracket lastBracket = context.lastBracket(); + if (lastBracket != null) { + lastBracket.bracketAfter = true; + } + context.lastBracket(bracket); + } + + protected void removeLastBracket() { + final InlineContext context = this.context; + context.lastBracket(context.lastBracket().previous); + } + + protected Bracket lastBracket() { + return context.lastBracket(); + } + + protected Delimiter lastDelimiter() { + return context.lastDelimiter(); + } + + protected Map referenceMap() { + return context.referenceMap(); + } + + protected Map delimiterProcessors() { + return context.delimiterProcessors(); + } + + /** + * Parse zero or more space characters, including at most one newline. + */ + protected boolean spnl() { + match(SPNL); + return true; + } + + /** + * Attempt to parse link destination, returning the string or null if no match. + */ + protected String parseLinkDestination() { + String res = match(LINK_DESTINATION_BRACES); + if (res != null) { // chop off surrounding <..>: + if (res.length() == 2) { + return ""; + } else { + return Escaping.unescapeString(res.substring(1, res.length() - 1)); + } + } else { + int startIndex = index; + parseLinkDestinationWithBalancedParens(); + return Escaping.unescapeString(input.substring(startIndex, index)); + } + } + + protected void parseLinkDestinationWithBalancedParens() { + int parens = 0; + while (true) { + char c = peek(); + switch (c) { + case '\0': + return; + case '\\': + // check if we have an escapable character + if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { + // skip over the escaped character (after switch) + index++; + break; + } + // otherwise, we treat this as a literal backslash + break; + case '(': + parens++; + break; + case ')': + if (parens == 0) { + return; + } else { + parens--; + } + break; + case ' ': + // ASCII space + return; + default: + // or control character + if (Character.isISOControl(c)) { + return; + } + } + index++; + } + } + + /** + * Attempt to parse link title (sans quotes), returning the string or null if no match. + */ + protected String parseLinkTitle() { + String title = match(LINK_TITLE); + if (title != null) { + // chop off quotes from title and unescape: + return Escaping.unescapeString(title.substring(1, title.length() - 1)); + } else { + return null; + } + } + + /** + * Attempt to parse a link label, returning number of characters parsed. + */ + protected int parseLinkLabel() { + String m = match(LINK_LABEL); + // Spec says "A link label can have at most 999 characters inside the square brackets" + if (m == null || m.length() > 1001) { + return 0; + } else { + return m.length(); + } + } + + protected void processDelimiters(Delimiter stackBottom) { + + Map openersBottom = new HashMap<>(); + + // find first closer above stackBottom: + Delimiter closer = lastDelimiter(); + while (closer != null && closer.previous != stackBottom) { + closer = closer.previous; + } + // move forward, looking for closers, and handling each + while (closer != null) { + char delimiterChar = closer.delimiterChar; + + DelimiterProcessor delimiterProcessor = delimiterProcessors().get(delimiterChar); + if (!closer.canClose || delimiterProcessor == null) { + closer = closer.next; + continue; + } + + char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); + + // Found delimiter closer. Now look back for first matching opener. + int useDelims = 0; + boolean openerFound = false; + boolean potentialOpenerFound = false; + Delimiter opener = closer.previous; + while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { + if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { + potentialOpenerFound = true; + useDelims = delimiterProcessor.getDelimiterUse(opener, closer); + if (useDelims > 0) { + openerFound = true; + break; + } + } + opener = opener.previous; + } + + if (!openerFound) { + if (!potentialOpenerFound) { + // Set lower bound for future searches for openers. + // Only do this when we didn't even have a potential + // opener (one that matches the character and can open). + // If an opener was rejected because of the number of + // delimiters (e.g. because of the "multiple of 3" rule), + // we want to consider it next time because the number + // of delimiters can change as we continue processing. + openersBottom.put(delimiterChar, closer.previous); + if (!closer.canOpen) { + // We can remove a closer that can't be an opener, + // once we've seen there's no matching opener: + removeDelimiterKeepNode(closer); + } + } + closer = closer.next; + continue; + } + + Text openerNode = opener.node; + Text closerNode = closer.node; + + // Remove number of used delimiters from stack and inline nodes. + opener.length -= useDelims; + closer.length -= useDelims; + openerNode.setLiteral( + openerNode.getLiteral().substring(0, + openerNode.getLiteral().length() - useDelims)); + closerNode.setLiteral( + closerNode.getLiteral().substring(0, + closerNode.getLiteral().length() - useDelims)); + + removeDelimitersBetween(opener, closer); + // The delimiter processor can re-parent the nodes between opener and closer, + // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. + mergeTextNodesBetweenExclusive(openerNode, closerNode); + delimiterProcessor.process(openerNode, closerNode, useDelims); + + // No delimiter characters left to process, so we can remove delimiter and the now empty node. + if (opener.length == 0) { + removeDelimiterAndNode(opener); + } + + if (closer.length == 0) { + Delimiter next = closer.next; + removeDelimiterAndNode(closer); + closer = next; + } + } + + // remove all delimiters + Delimiter lastDelimiter; + while ((lastDelimiter = lastDelimiter()) != null) { + if (lastDelimiter != stackBottom) { + removeDelimiterKeepNode(lastDelimiter); + } else { + break; + } + } +// while (lastDelimiter != null && lastDelimiter != stackBottom) { +// removeDelimiterKeepNode(lastDelimiter); +// } + } + + private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { + // No nodes between them + if (fromNode == toNode || fromNode.getNext() == toNode) { + return; + } + + mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); + } + + protected void mergeChildTextNodes(Node node) { + // No children or just one child node, no need for merging + if (node.getFirstChild() == node.getLastChild()) { + return; + } + + mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); + } + + protected void mergeTextNodesInclusive(Node fromNode, Node toNode) { + Text first = null; + Text last = null; + int length = 0; + + Node node = fromNode; + while (node != null) { + if (node instanceof Text) { + Text text = (Text) node; + if (first == null) { + first = text; + } + length += text.getLiteral().length(); + last = text; + } else { + mergeIfNeeded(first, last, length); + first = null; + last = null; + length = 0; + } + if (node == toNode) { + break; + } + node = node.getNext(); + } + + mergeIfNeeded(first, last, length); + } + + protected void mergeIfNeeded(Text first, Text last, int textLength) { + if (first != null && last != null && first != last) { + StringBuilder sb = new StringBuilder(textLength); + sb.append(first.getLiteral()); + Node node = first.getNext(); + Node stop = last.getNext(); + while (node != stop) { + sb.append(((Text) node).getLiteral()); + Node unlink = node; + node = node.getNext(); + unlink.unlink(); + } + String literal = sb.toString(); + first.setLiteral(literal); + } + } + + protected void removeDelimitersBetween(Delimiter opener, Delimiter closer) { + Delimiter delimiter = closer.previous; + while (delimiter != null && delimiter != opener) { + Delimiter previousDelimiter = delimiter.previous; + removeDelimiterKeepNode(delimiter); + delimiter = previousDelimiter; + } + } + + /** + * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. + */ + protected void removeDelimiterAndNode(Delimiter delim) { + Text node = delim.node; + node.unlink(); + removeDelimiter(delim); + } + + /** + * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. + */ + protected void removeDelimiterKeepNode(Delimiter delim) { + removeDelimiter(delim); + } + + protected void removeDelimiter(Delimiter delim) { + if (delim.previous != null) { + delim.previous.next = delim.next; + } + if (delim.next == null) { + // top of stack +// lastDelimiter = delim.previous; + context.lastDelimiter(delim.previous); + } else { + delim.next.previous = delim.previous; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java new file mode 100644 index 00000000..0c3b88b7 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java @@ -0,0 +1,62 @@ +package io.noties.markwon.sample.editor.inline; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.node.Link; +import org.commonmark.parser.delimiter.DelimiterProcessor; + +import java.util.Map; + +public class InlineContext { + + /** + * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different + * from the algorithm described in the spec.) + */ + private Delimiter lastDelimiter; + + /** + * Top opening bracket ([ or ![)). + */ + private Bracket lastBracket; + + /** + * Link references by ID, needs to be built up using parseReference before calling parse. + */ + private Map referenceMap; + + private Map delimiterProcessors; + + + public Delimiter lastDelimiter() { + return lastDelimiter; + } + + public void lastDelimiter(Delimiter lastDelimiter) { + this.lastDelimiter = lastDelimiter; + } + + public Bracket lastBracket() { + return lastBracket; + } + + public void lastBracket(Bracket lastBracket) { + this.lastBracket = lastBracket; + } + + public Map referenceMap() { + return referenceMap; + } + + public void referenceMap(Map referenceMap) { + this.referenceMap = referenceMap; + } + + public Map delimiterProcessors() { + return delimiterProcessors; + } + + public void delimiterProcessors(Map delimiterProcessors) { + this.delimiterProcessors = delimiterProcessors; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java new file mode 100644 index 00000000..12704f7e --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java @@ -0,0 +1,1190 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.util.Escaping; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; +import org.commonmark.parser.InlineParser; +import org.commonmark.parser.InlineParserFactory; +import org.commonmark.parser.delimiter.DelimiterProcessor; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import io.noties.debug.Debug; + +public class InlineParserImpl implements InlineParser, ReferenceParser { + + public interface Builder { + + @NonNull + Builder addInlineProcessor(@NonNull Inline inline); + + @NonNull + Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor); + + @NonNull + InlineParserFactory build(); + } + + @NonNull + public static Builder builder() { + return new BuilderImpl(); + } + +// @NonNull +// public static InlineParserFactory factory() { +//// return context -> new InlineParserImpl(context.getCustomDelimiterProcessors()); +// } +// +// public static InlineParserFactory factory(Inline... inlines) { +// return context -> new InlineParserImpl(Arrays.asList(inlines), context.getCustomDelimiterProcessors()); +// } + + private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; +// private static final String HTMLCOMMENT = "|"; +// private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; +// private static final String DECLARATION = "]*>"; +// private static final String CDATA = ""; +// private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT +// + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; +// private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; + + private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; + private static final Pattern PUNCTUATION = Pattern + .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); + +// private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); + + private static final Pattern LINK_TITLE = Pattern.compile( + "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + + '|' + + "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + + '|' + + "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); + + private static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); + + private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); + + private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + +// private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); +// +// private static final Pattern TICKS = Pattern.compile("`+"); +// +// private static final Pattern TICKS_HERE = Pattern.compile("^`+"); +// +// private static final Pattern EMAIL_AUTOLINK = Pattern +// .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); +// +// private static final Pattern AUTOLINK = Pattern +// .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); + + private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); + + private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); + +// private static final Pattern WHITESPACE = Pattern.compile("\\s+"); +// +// private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); + + private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); + + private final BitSet specialCharacters; + private final BitSet delimiterCharacters; + private final Map delimiterProcessors; + + /** + * Link references by ID, needs to be built up using parseReference before calling parse. + */ +// private Map referenceMap = new HashMap<>(); + + private Node block; + + private String input; + private int index; + + /** + * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different + * from the algorithm described in the spec.) + */ +// private Delimiter lastDelimiter; + + /** + * Top opening bracket ([ or ![)). + */ +// private Bracket lastBracket; + + private final Map> inlines; + + private InlineContext inlineContext; + + + public InlineParserImpl(List inlines, List delimiterProcessors) { + this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); + this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); + this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); + // we must also put into special characters (otherwise won't be triggered) + this.inlines = calculateInlines(specialCharacters, inlines); + } + + @NonNull + private static Map> calculateInlines(@NonNull BitSet specialCharacters, @NonNull List inlines) { + final Map> map = new HashMap<>(inlines.size()); + List list; + for (Inline inline : inlines) { + for (Character character : inline.characters()) { + specialCharacters.set(character); + list = map.get(character); + if (list == null) { + list = new ArrayList<>(1); + map.put(character, list); + } + list.add(inline); + } + } + return map; + } + + public static BitSet calculateDelimiterCharacters(Set characters) { + BitSet bitSet = new BitSet(); + for (Character character : characters) { + bitSet.set(character); + } + return bitSet; + } + + public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { + BitSet bitSet = new BitSet(); + bitSet.or(delimiterCharacters); + bitSet.set('\n'); + bitSet.set('`'); + bitSet.set('['); + bitSet.set(']'); + bitSet.set('\\'); + bitSet.set('!'); + bitSet.set('<'); + bitSet.set('&'); + return bitSet; + } + + public static Map calculateDelimiterProcessors(List delimiterProcessors) { + Map map = new HashMap<>(); +// addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + addDelimiterProcessors(delimiterProcessors, map); + return map; + } + + private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { + for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { + char opening = delimiterProcessor.getOpeningCharacter(); + char closing = delimiterProcessor.getClosingCharacter(); + if (opening == closing) { + DelimiterProcessor old = map.get(opening); + if (old != null && old.getOpeningCharacter() == old.getClosingCharacter()) { + StaggeredDelimiterProcessor s; + if (old instanceof StaggeredDelimiterProcessor) { + s = (StaggeredDelimiterProcessor) old; + } else { + s = new StaggeredDelimiterProcessor(opening); + s.add(old); + } + s.add(delimiterProcessor); + map.put(opening, s); + } else { + addDelimiterProcessorForChar(opening, delimiterProcessor, map); + } + } else { + addDelimiterProcessorForChar(opening, delimiterProcessor, map); + addDelimiterProcessorForChar(closing, delimiterProcessor, map); + } + } + } + + private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map delimiterProcessors) { + DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd); + if (existing != null) { + throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'"); + } + } + + /** + * Parse content in block into inline children, using reference map to resolve references. + */ + @Override + public void parse(String content, Node block) { + this.block = block; + this.input = content.trim(); + this.index = 0; +// this.lastDelimiter = null; +// this.lastBracket = null; + this.inlineContext = createInlineContext(); + + Debug.i(input); +// Debug.i(inlines); + + boolean moreToParse; + do { + moreToParse = parseInline(); + } while (moreToParse); + + // todo: must be somehow shared + processDelimiters(null); + mergeChildTextNodes(block); + } + + private InlineContext createInlineContext() { + final InlineContext context = new InlineContext(); + context.delimiterProcessors(delimiterProcessors); + context.referenceMap(new HashMap<>()); + return context; + } + + /** + * Attempt to parse a link reference, modifying the internal reference map. + */ + @Override + public int parseReference(String s) { + this.input = s; + this.index = 0; + String dest; + String title; + int matchChars; + int startIndex = index; + + // label: + matchChars = parseLinkLabel(); + if (matchChars == 0) { + return 0; + } + + String rawLabel = input.substring(0, matchChars); + + // colon: + if (peek() != ':') { + return 0; + } + index++; + + // link url + spnl(); + + dest = parseLinkDestination(); + if (dest == null || dest.length() == 0) { + return 0; + } + + int beforeTitle = index; + spnl(); + title = parseLinkTitle(); + if (title == null) { + // rewind before spaces + index = beforeTitle; + } + + boolean atLineEnd = true; + if (index != input.length() && match(LINE_END) == null) { + if (title == null) { + atLineEnd = false; + } else { + // the potential title we found is not at the line end, + // but it could still be a legal link reference if we + // discard the title + title = null; + // rewind before spaces + index = beforeTitle; + // and instead check if the link URL is at the line end + atLineEnd = match(LINE_END) != null; + } + } + + if (!atLineEnd) { + return 0; + } + + String normalizedLabel = Escaping.normalizeReference(rawLabel); + if (normalizedLabel.isEmpty()) { + return 0; + } + + final Map referenceMap = inlineContext.referenceMap(); + + if (!referenceMap.containsKey(normalizedLabel)) { + Link link = new Link(dest, title); + referenceMap.put(normalizedLabel, link); + } + return index - startIndex; + } + + private Text appendText(CharSequence text, int beginIndex, int endIndex) { + return appendText(text.subSequence(beginIndex, endIndex)); + } + + private Text appendText(CharSequence text) { + Text node = new Text(text.toString()); + appendNode(node); + return node; + } + + private void appendNode(Node node) { + block.appendChild(node); + } + + /** + * Parse the next inline element in subject, advancing input index. + * On success, add the result to block's children and return true. + * On failure, return false. + */ + private boolean parseInline() { + char c = peek(); + if (c == '\0') { + return false; + } + + boolean res = false; + + final List inlines = this.inlines.get(c); + + Debug.i("char: '%s', inlines: %s", c, inlines); + + if (inlines != null) { + for (Inline inline : inlines) { + res = processInline(inline); + Debug.i("char: '%s', res: %s, inline: %s", c, res, inline); + if (res) { + break; + } + } + } else { + boolean isDelimiter = delimiterCharacters.get(c); + if (isDelimiter) { + DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); + res = parseDelimiters(delimiterProcessor, c); + } else { + res = parseString(); + } + } + +// switch (c) { +// case '\n': +// res = parseNewline(); +// break; +// case '\\': +// res = parseBackslash(); +// break; +// case '`': +// res = parseBackticks(); +// break; +// case '[': +// res = parseOpenBracket(); +// break; +// case '!': +// res = parseBang(); +// break; +// case ']': +// res = parseCloseBracket(); +// break; +// case '<': +// res = parseAutolink() || parseHtmlInline(); +// break; +// case '&': +// res = parseEntity(); +// break; +// default: +// boolean isDelimiter = delimiterCharacters.get(c); +// if (isDelimiter) { +// DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); +// res = parseDelimiters(delimiterProcessor, c); +// } else { +// res = parseString(); +// } +// break; +// } + if (!res) { + index++; + // When we get here, it's only for a single special character that turned out to not have a special meaning. + // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. + String literal = String.valueOf(c); + appendText(literal); + } + + return true; + } + + private boolean processInline(@NonNull Inline inline) { + inline.bind(inlineContext, block, input, index); + final boolean result = inline.parse(); + index = inline.index; + return result; + } + + /** + * If RE matches at current index in the input, advance index and return the match; otherwise return null. + */ + private String match(Pattern re) { + if (index >= input.length()) { + return null; + } + Matcher matcher = re.matcher(input); + matcher.region(index, input.length()); + boolean m = matcher.find(); + if (m) { + index = matcher.end(); + return matcher.group(); + } else { + return null; + } + } + + /** + * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. + */ + private char peek() { + if (index < input.length()) { + return input.charAt(index); + } else { + return '\0'; + } + } + + /** + * Parse zero or more space characters, including at most one newline. + */ + private boolean spnl() { + match(SPNL); + return true; + } + +// /** +// * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. +// */ +// private boolean parseNewline() { +// index++; // assume we're at a \n +// +// Node lastChild = block.getLastChild(); +// // Check previous text for trailing spaces. +// // The "endsWith" is an optimization to avoid an RE match in the common case. +// if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { +// Text text = (Text) lastChild; +// String literal = text.getLiteral(); +// Matcher matcher = FINAL_SPACE.matcher(literal); +// int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; +// if (spaces > 0) { +// text.setLiteral(literal.substring(0, literal.length() - spaces)); +// } +// appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); +// } else { +// appendNode(new SoftLineBreak()); +// } +// +// // gobble leading spaces in next line +// while (peek() == ' ') { +// index++; +// } +// return true; +// } + +// /** +// * Parse a backslash-escaped special character, adding either the escaped character, a hard line break +// * (if the backslash is followed by a newline), or a literal backslash to the block's children. +// */ +// private boolean parseBackslash() { +// index++; +// if (peek() == '\n') { +// appendNode(new HardLineBreak()); +// index++; +// } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { +// appendText(input, index, index + 1); +// index++; +// } else { +// appendText("\\"); +// } +// return true; +// } + +// /** +// * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. +// */ +// private boolean parseBackticks() { +// String ticks = match(TICKS_HERE); +// if (ticks == null) { +// return false; +// } +// int afterOpenTicks = index; +// String matched; +// while ((matched = match(TICKS)) != null) { +// if (matched.equals(ticks)) { +// Code node = new Code(); +// String content = input.substring(afterOpenTicks, index - ticks.length()); +// String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); +// node.setLiteral(literal); +// appendNode(node); +// return true; +// } +// } +// // If we got here, we didn't match a closing backtick sequence. +// index = afterOpenTicks; +// appendText(ticks); +// return true; +// } + + /** + * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. + */ + private boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { + DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); + if (res == null) { + return false; + } + int length = res.count; + int startIndex = index; + + index += length; + Text node = appendText(input, startIndex, index); + + // Add entry to stack for this opener + + final Delimiter lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, inlineContext.lastDelimiter()); + lastDelimiter.length = length; + lastDelimiter.originalLength = length; + if (lastDelimiter.previous != null) { + lastDelimiter.previous.next = lastDelimiter; + } + inlineContext.lastDelimiter(lastDelimiter); + + return true; + } + +// /** +// * Add open bracket to delimiter stack and add a text node to block's children. +// */ +// private boolean parseOpenBracket() { +// int startIndex = index; +// index++; +// +// Text node = appendText("["); +// +// // Add entry to stack for this opener +// addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); +// +// return true; +// } + +// /** +// * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. +// * Otherwise just add a text node. +// */ +// private boolean parseBang() { +// int startIndex = index; +// index++; +// if (peek() == '[') { +// index++; +// +// Text node = appendText("!["); +// +// // Add entry to stack for this opener +// addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); +// } else { +// appendText("!"); +// } +// return true; +// } + +// /** +// * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a +// * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. +// */ +// private boolean parseCloseBracket() { +// index++; +// int startIndex = index; +// +// // Get previous `[` or `![` +// Bracket opener = lastBracket; +// if (opener == null) { +// // No matching opener, just return a literal. +// appendText("]"); +// return true; +// } +// +// if (!opener.allowed) { +// // Matching opener but it's not allowed, just return a literal. +// appendText("]"); +// removeLastBracket(); +// return true; +// } +// +// // Check to see if we have a link/image +// +// String dest = null; +// String title = null; +// boolean isLinkOrImage = false; +// +// // Maybe a inline link like `[foo](/uri "title")` +// if (peek() == '(') { +// index++; +// spnl(); +// if ((dest = parseLinkDestination()) != null) { +// spnl(); +// // title needs a whitespace before +// if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { +// title = parseLinkTitle(); +// spnl(); +// } +// if (peek() == ')') { +// index++; +// isLinkOrImage = true; +// } else { +// index = startIndex; +// } +// } +// } +// +// // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` +// if (!isLinkOrImage) { +// +// // See if there's a link label like `[bar]` or `[]` +// int beforeLabel = index; +// int labelLength = parseLinkLabel(); +// String ref = null; +// if (labelLength > 2) { +// ref = input.substring(beforeLabel, beforeLabel + labelLength); +// } else if (!opener.bracketAfter) { +// // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. +// // But it can only be a reference when there's no (unescaped) bracket in it. +// // If there is, we don't even need to try to look up the reference. This is an optimization. +// ref = input.substring(opener.index, startIndex); +// } +// +// if (ref != null) { +// Link link = referenceMap.get(Escaping.normalizeReference(ref)); +// if (link != null) { +// dest = link.getDestination(); +// title = link.getTitle(); +// isLinkOrImage = true; +// } +// } +// } +// +// if (isLinkOrImage) { +// // If we got here, open is a potential opener +// Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); +// +// Node node = opener.node.getNext(); +// while (node != null) { +// Node next = node.getNext(); +// linkOrImage.appendChild(node); +// node = next; +// } +// appendNode(linkOrImage); +// +// // Process delimiters such as emphasis inside link/image +// processDelimiters(opener.previousDelimiter); +// mergeChildTextNodes(linkOrImage); +// // We don't need the corresponding text node anymore, we turned it into a link/image node +// opener.node.unlink(); +// removeLastBracket(); +// +// // Links within links are not allowed. We found this link, so there can be no other link around it. +// if (!opener.image) { +// Bracket bracket = lastBracket; +// while (bracket != null) { +// if (!bracket.image) { +// // Disallow link opener. It will still get matched, but will not result in a link. +// bracket.allowed = false; +// } +// bracket = bracket.previous; +// } +// } +// +// return true; +// +// } else { // no link or image +// +// appendText("]"); +// removeLastBracket(); +// +// index = startIndex; +// return true; +// } +// } + +// private void addBracket(Bracket bracket) { +// if (lastBracket != null) { +// lastBracket.bracketAfter = true; +// } +// lastBracket = bracket; +// } +// +// private void removeLastBracket() { +// lastBracket = lastBracket.previous; +// } + + /** + * Attempt to parse link destination, returning the string or null if no match. + */ + private String parseLinkDestination() { + String res = match(LINK_DESTINATION_BRACES); + if (res != null) { // chop off surrounding <..>: + if (res.length() == 2) { + return ""; + } else { + return Escaping.unescapeString(res.substring(1, res.length() - 1)); + } + } else { + int startIndex = index; + parseLinkDestinationWithBalancedParens(); + return Escaping.unescapeString(input.substring(startIndex, index)); + } + } + + private void parseLinkDestinationWithBalancedParens() { + int parens = 0; + while (true) { + char c = peek(); + switch (c) { + case '\0': + return; + case '\\': + // check if we have an escapable character + if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { + // skip over the escaped character (after switch) + index++; + break; + } + // otherwise, we treat this as a literal backslash + break; + case '(': + parens++; + break; + case ')': + if (parens == 0) { + return; + } else { + parens--; + } + break; + case ' ': + // ASCII space + return; + default: + // or control character + if (Character.isISOControl(c)) { + return; + } + } + index++; + } + } + + /** + * Attempt to parse link title (sans quotes), returning the string or null if no match. + */ + private String parseLinkTitle() { + String title = match(LINK_TITLE); + if (title != null) { + // chop off quotes from title and unescape: + return Escaping.unescapeString(title.substring(1, title.length() - 1)); + } else { + return null; + } + } + + /** + * Attempt to parse a link label, returning number of characters parsed. + */ + private int parseLinkLabel() { + String m = match(LINK_LABEL); + // Spec says "A link label can have at most 999 characters inside the square brackets" + if (m == null || m.length() > 1001) { + return 0; + } else { + return m.length(); + } + } + +// /** +// * Attempt to parse an autolink (URL or email in pointy brackets). +// */ +// private boolean parseAutolink() { +// String m; +// if ((m = match(EMAIL_AUTOLINK)) != null) { +// String dest = m.substring(1, m.length() - 1); +// Link node = new Link("mailto:" + dest, null); +// node.appendChild(new Text(dest)); +// appendNode(node); +// return true; +// } else if ((m = match(AUTOLINK)) != null) { +// String dest = m.substring(1, m.length() - 1); +// Link node = new Link(dest, null); +// node.appendChild(new Text(dest)); +// appendNode(node); +// return true; +// } else { +// return false; +// } +// } + +// /** +// * Attempt to parse inline HTML. +// */ +// private boolean parseHtmlInline() { +// String m = match(HTML_TAG); +// if (m != null) { +// HtmlInline node = new HtmlInline(); +// node.setLiteral(m); +// appendNode(node); +// return true; +// } else { +// return false; +// } +// } + +// /** +// * Attempt to parse an entity, return Entity object if successful. +// */ +// private boolean parseEntity() { +// String m; +// if ((m = match(ENTITY_HERE)) != null) { +// appendText(Html5Entities.entityToString(m)); +// return true; +// } else { +// return false; +// } +// } + + /** + * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. + */ + private boolean parseString() { + int begin = index; + int length = input.length(); + while (index != length) { + if (specialCharacters.get(input.charAt(index))) { + break; + } + index++; + } + if (begin != index) { + appendText(input, begin, index); + return true; + } else { + return false; + } + } + + /** + * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters + * and whether they are positioned such that they can open and/or close emphasis or strong emphasis. + * + * @return information about delimiter run, or {@code null} + */ + private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { + int startIndex = index; + + int delimiterCount = 0; + while (peek() == delimiterChar) { + delimiterCount++; + index++; + } + + if (delimiterCount < delimiterProcessor.getMinLength()) { + index = startIndex; + return null; + } + + String before = startIndex == 0 ? "\n" : + input.substring(startIndex - 1, startIndex); + + char charAfter = peek(); + String after = charAfter == '\0' ? "\n" : + String.valueOf(charAfter); + + // We could be more lazy here, in most cases we don't need to do every match case. + boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches(); + boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches(); + boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches(); + boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches(); + + boolean leftFlanking = !afterIsWhitespace && + (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); + boolean rightFlanking = !beforeIsWhitespace && + (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation); + boolean canOpen; + boolean canClose; + if (delimiterChar == '_') { + canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation); + canClose = rightFlanking && (!leftFlanking || afterIsPunctuation); + } else { + canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter(); + canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); + } + + index = startIndex; + return new DelimiterData(delimiterCount, canOpen, canClose); + } + + private void processDelimiters(Delimiter stackBottom) { + + Map openersBottom = new HashMap<>(); + + // find first closer above stackBottom: + Delimiter closer = inlineContext.lastDelimiter(); + while (closer != null && closer.previous != stackBottom) { + closer = closer.previous; + } + // move forward, looking for closers, and handling each + while (closer != null) { + char delimiterChar = closer.delimiterChar; + + DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar); + if (!closer.canClose || delimiterProcessor == null) { + closer = closer.next; + continue; + } + + char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); + + // Found delimiter closer. Now look back for first matching opener. + int useDelims = 0; + boolean openerFound = false; + boolean potentialOpenerFound = false; + Delimiter opener = closer.previous; + while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { + if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { + potentialOpenerFound = true; + useDelims = delimiterProcessor.getDelimiterUse(opener, closer); + if (useDelims > 0) { + openerFound = true; + break; + } + } + opener = opener.previous; + } + + if (!openerFound) { + if (!potentialOpenerFound) { + // Set lower bound for future searches for openers. + // Only do this when we didn't even have a potential + // opener (one that matches the character and can open). + // If an opener was rejected because of the number of + // delimiters (e.g. because of the "multiple of 3" rule), + // we want to consider it next time because the number + // of delimiters can change as we continue processing. + openersBottom.put(delimiterChar, closer.previous); + if (!closer.canOpen) { + // We can remove a closer that can't be an opener, + // once we've seen there's no matching opener: + removeDelimiterKeepNode(closer); + } + } + closer = closer.next; + continue; + } + + Text openerNode = opener.node; + Text closerNode = closer.node; + + // Remove number of used delimiters from stack and inline nodes. + opener.length -= useDelims; + closer.length -= useDelims; + openerNode.setLiteral( + openerNode.getLiteral().substring(0, + openerNode.getLiteral().length() - useDelims)); + closerNode.setLiteral( + closerNode.getLiteral().substring(0, + closerNode.getLiteral().length() - useDelims)); + + removeDelimitersBetween(opener, closer); + // The delimiter processor can re-parent the nodes between opener and closer, + // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. + mergeTextNodesBetweenExclusive(openerNode, closerNode); + delimiterProcessor.process(openerNode, closerNode, useDelims); + + // No delimiter characters left to process, so we can remove delimiter and the now empty node. + if (opener.length == 0) { + removeDelimiterAndNode(opener); + } + + if (closer.length == 0) { + Delimiter next = closer.next; + removeDelimiterAndNode(closer); + closer = next; + } + } + + // remove all delimiters + Delimiter lastDelimiter; + while (((lastDelimiter = inlineContext.lastDelimiter())) != null) { + if (lastDelimiter != stackBottom) { + removeDelimiterKeepNode(lastDelimiter); + } else { + break; + } + } +// while (lastDelimiter != null && lastDelimiter != stackBottom) { +// removeDelimiterKeepNode(lastDelimiter); +// } + } + + private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { + Delimiter delimiter = closer.previous; + while (delimiter != null && delimiter != opener) { + Delimiter previousDelimiter = delimiter.previous; + removeDelimiterKeepNode(delimiter); + delimiter = previousDelimiter; + } + } + + /** + * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. + */ + private void removeDelimiterAndNode(Delimiter delim) { + Text node = delim.node; + node.unlink(); + removeDelimiter(delim); + } + + /** + * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. + */ + private void removeDelimiterKeepNode(Delimiter delim) { + removeDelimiter(delim); + } + + private void removeDelimiter(Delimiter delim) { + if (delim.previous != null) { + delim.previous.next = delim.next; + } + if (delim.next == null) { + // top of stack +// lastDelimiter = delim.previous; + inlineContext.lastDelimiter(delim.previous); + } else { + delim.next.previous = delim.previous; + } + } + + private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { + // No nodes between them + if (fromNode == toNode || fromNode.getNext() == toNode) { + return; + } + + mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); + } + + private void mergeChildTextNodes(Node node) { + // No children or just one child node, no need for merging + if (node.getFirstChild() == node.getLastChild()) { + return; + } + + mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); + } + + private void mergeTextNodesInclusive(Node fromNode, Node toNode) { + Text first = null; + Text last = null; + int length = 0; + + Node node = fromNode; + while (node != null) { + if (node instanceof Text) { + Text text = (Text) node; + if (first == null) { + first = text; + } + length += text.getLiteral().length(); + last = text; + } else { + mergeIfNeeded(first, last, length); + first = null; + last = null; + length = 0; + } + if (node == toNode) { + break; + } + node = node.getNext(); + } + + mergeIfNeeded(first, last, length); + } + + private void mergeIfNeeded(Text first, Text last, int textLength) { + if (first != null && last != null && first != last) { + StringBuilder sb = new StringBuilder(textLength); + sb.append(first.getLiteral()); + Node node = first.getNext(); + Node stop = last.getNext(); + while (node != stop) { + sb.append(((Text) node).getLiteral()); + Node unlink = node; + node = node.getNext(); + unlink.unlink(); + } + String literal = sb.toString(); + first.setLiteral(literal); + } + } + + private static class DelimiterData { + + final int count; + final boolean canClose; + final boolean canOpen; + + DelimiterData(int count, boolean canOpen, boolean canClose) { + this.count = count; + this.canOpen = canOpen; + this.canClose = canClose; + } + } + + private static class BuilderImpl implements Builder { + + private final List inlines = new ArrayList<>(); + private final List delimiterProcessors = new ArrayList<>(); + + @NonNull + @Override + public Builder addInlineProcessor(@NonNull Inline inline) { + inlines.add(inline); + return this; + } + + @NonNull + @Override + public Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor) { + delimiterProcessors.add(delimiterProcessor); + return this; + } + + @NonNull + @Override + public InlineParserFactory build() { + return inlineParserContext -> { + final List processors; + final List custom = inlineParserContext.getCustomDelimiterProcessors(); + if (custom != null && !custom.isEmpty()) { + processors = new ArrayList<>(delimiterProcessors); + processors.addAll(custom); + } else { + processors = delimiterProcessors; + } + return new InlineParserImpl(inlines, processors); + }; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java new file mode 100644 index 00000000..ee45884d --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java @@ -0,0 +1,1060 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.util.Html5Entities; +import org.commonmark.internal.util.Parsing; +import org.commonmark.node.Code; +import org.commonmark.node.HardLineBreak; +import org.commonmark.node.HtmlInline; +import org.commonmark.node.Image; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.SoftLineBreak; +import org.commonmark.node.Text; +import org.commonmark.parser.InlineParser; +import org.commonmark.parser.InlineParserFactory; +import org.commonmark.parser.delimiter.DelimiterProcessor; + +import java.util.Arrays; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class InlineParserOriginal implements InlineParser, ReferenceParser { + + @NonNull + public static InlineParserFactory factory() { + return context -> new InlineParserOriginal(context.getCustomDelimiterProcessors()); + } + + private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; + private static final String HTMLCOMMENT = "|"; + private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; + private static final String DECLARATION = "]*>"; + private static final String CDATA = ""; + private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT + + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; + private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; + + private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; + private static final Pattern PUNCTUATION = Pattern + .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); + + private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); + + private static final Pattern LINK_TITLE = Pattern.compile( + "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + + '|' + + "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + + '|' + + "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); + + private static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); + + private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); + + private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + + private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); + + private static final Pattern TICKS = Pattern.compile("`+"); + + private static final Pattern TICKS_HERE = Pattern.compile("^`+"); + + private static final Pattern EMAIL_AUTOLINK = Pattern + .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); + + private static final Pattern AUTOLINK = Pattern + .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); + + private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); + + private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); + + private static final Pattern WHITESPACE = Pattern.compile("\\s+"); + + private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); + + private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); + + private final BitSet specialCharacters; + private final BitSet delimiterCharacters; + private final Map delimiterProcessors; + + /** + * Link references by ID, needs to be built up using parseReference before calling parse. + */ + private Map referenceMap = new HashMap<>(); + + private Node block; + + private String input; + private int index; + + /** + * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different + * from the algorithm described in the spec.) + */ + private Delimiter lastDelimiter; + + /** + * Top opening bracket ([ or ![)). + */ + private Bracket lastBracket; + + public InlineParserOriginal(List delimiterProcessors) { + this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); + this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); + this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); + } + + public static BitSet calculateDelimiterCharacters(Set characters) { + BitSet bitSet = new BitSet(); + for (Character character : characters) { + bitSet.set(character); + } + return bitSet; + } + + public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { + BitSet bitSet = new BitSet(); + bitSet.or(delimiterCharacters); + bitSet.set('\n'); + bitSet.set('`'); + bitSet.set('['); + bitSet.set(']'); + bitSet.set('\\'); + bitSet.set('!'); + bitSet.set('<'); + bitSet.set('&'); + return bitSet; + } + + public static Map calculateDelimiterProcessors(List delimiterProcessors) { + Map map = new HashMap<>(); + addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); + addDelimiterProcessors(delimiterProcessors, map); + return map; + } + + private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { + for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { + char opening = delimiterProcessor.getOpeningCharacter(); + char closing = delimiterProcessor.getClosingCharacter(); + if (opening == closing) { + DelimiterProcessor old = map.get(opening); + if (old != null && old.getOpeningCharacter() == old.getClosingCharacter()) { + StaggeredDelimiterProcessor s; + if (old instanceof StaggeredDelimiterProcessor) { + s = (StaggeredDelimiterProcessor) old; + } else { + s = new StaggeredDelimiterProcessor(opening); + s.add(old); + } + s.add(delimiterProcessor); + map.put(opening, s); + } else { + addDelimiterProcessorForChar(opening, delimiterProcessor, map); + } + } else { + addDelimiterProcessorForChar(opening, delimiterProcessor, map); + addDelimiterProcessorForChar(closing, delimiterProcessor, map); + } + } + } + + private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map delimiterProcessors) { + DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd); + if (existing != null) { + throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'"); + } + } + + /** + * Parse content in block into inline children, using reference map to resolve references. + */ + @Override + public void parse(String content, Node block) { + this.block = block; + this.input = content.trim(); + this.index = 0; + this.lastDelimiter = null; + this.lastBracket = null; + + boolean moreToParse; + do { + moreToParse = parseInline(); + } while (moreToParse); + + processDelimiters(null); + mergeChildTextNodes(block); + } + + /** + * Attempt to parse a link reference, modifying the internal reference map. + */ + @Override + public int parseReference(String s) { + this.input = s; + this.index = 0; + String dest; + String title; + int matchChars; + int startIndex = index; + + // label: + matchChars = parseLinkLabel(); + if (matchChars == 0) { + return 0; + } + + String rawLabel = input.substring(0, matchChars); + + // colon: + if (peek() != ':') { + return 0; + } + index++; + + // link url + spnl(); + + dest = parseLinkDestination(); + if (dest == null || dest.length() == 0) { + return 0; + } + + int beforeTitle = index; + spnl(); + title = parseLinkTitle(); + if (title == null) { + // rewind before spaces + index = beforeTitle; + } + + boolean atLineEnd = true; + if (index != input.length() && match(LINE_END) == null) { + if (title == null) { + atLineEnd = false; + } else { + // the potential title we found is not at the line end, + // but it could still be a legal link reference if we + // discard the title + title = null; + // rewind before spaces + index = beforeTitle; + // and instead check if the link URL is at the line end + atLineEnd = match(LINE_END) != null; + } + } + + if (!atLineEnd) { + return 0; + } + + String normalizedLabel = Escaping.normalizeReference(rawLabel); + if (normalizedLabel.isEmpty()) { + return 0; + } + + if (!referenceMap.containsKey(normalizedLabel)) { + Link link = new Link(dest, title); + referenceMap.put(normalizedLabel, link); + } + return index - startIndex; + } + + private Text appendText(CharSequence text, int beginIndex, int endIndex) { + return appendText(text.subSequence(beginIndex, endIndex)); + } + + private Text appendText(CharSequence text) { + Text node = new Text(text.toString()); + appendNode(node); + return node; + } + + private void appendNode(Node node) { + block.appendChild(node); + } + + /** + * Parse the next inline element in subject, advancing input index. + * On success, add the result to block's children and return true. + * On failure, return false. + */ + private boolean parseInline() { + boolean res; + char c = peek(); + if (c == '\0') { + return false; + } + switch (c) { + case '\n': + res = parseNewline(); + break; + case '\\': + res = parseBackslash(); + break; + case '`': + res = parseBackticks(); + break; + case '[': + res = parseOpenBracket(); + break; + case '!': + res = parseBang(); + break; + case ']': + res = parseCloseBracket(); + break; + case '<': + res = parseAutolink() || parseHtmlInline(); + break; + case '&': + res = parseEntity(); + break; + default: + boolean isDelimiter = delimiterCharacters.get(c); + if (isDelimiter) { + DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); + res = parseDelimiters(delimiterProcessor, c); + } else { + res = parseString(); + } + break; + } + if (!res) { + index++; + // When we get here, it's only for a single special character that turned out to not have a special meaning. + // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. + String literal = String.valueOf(c); + appendText(literal); + } + + return true; + } + + /** + * If RE matches at current index in the input, advance index and return the match; otherwise return null. + */ + private String match(Pattern re) { + if (index >= input.length()) { + return null; + } + Matcher matcher = re.matcher(input); + matcher.region(index, input.length()); + boolean m = matcher.find(); + if (m) { + index = matcher.end(); + return matcher.group(); + } else { + return null; + } + } + + /** + * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. + */ + private char peek() { + if (index < input.length()) { + return input.charAt(index); + } else { + return '\0'; + } + } + + /** + * Parse zero or more space characters, including at most one newline. + */ + private boolean spnl() { + match(SPNL); + return true; + } + + /** + * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. + */ + private boolean parseNewline() { + index++; // assume we're at a \n + + Node lastChild = block.getLastChild(); + // Check previous text for trailing spaces. + // The "endsWith" is an optimization to avoid an RE match in the common case. + if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { + Text text = (Text) lastChild; + String literal = text.getLiteral(); + Matcher matcher = FINAL_SPACE.matcher(literal); + int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; + if (spaces > 0) { + text.setLiteral(literal.substring(0, literal.length() - spaces)); + } + appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); + } else { + appendNode(new SoftLineBreak()); + } + + // gobble leading spaces in next line + while (peek() == ' ') { + index++; + } + return true; + } + + /** + * Parse a backslash-escaped special character, adding either the escaped character, a hard line break + * (if the backslash is followed by a newline), or a literal backslash to the block's children. + */ + private boolean parseBackslash() { + index++; + if (peek() == '\n') { + appendNode(new HardLineBreak()); + index++; + } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { + appendText(input, index, index + 1); + index++; + } else { + appendText("\\"); + } + return true; + } + + /** + * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. + */ + private boolean parseBackticks() { + String ticks = match(TICKS_HERE); + if (ticks == null) { + return false; + } + int afterOpenTicks = index; + String matched; + while ((matched = match(TICKS)) != null) { + if (matched.equals(ticks)) { + Code node = new Code(); + String content = input.substring(afterOpenTicks, index - ticks.length()); + String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); + node.setLiteral(literal); + appendNode(node); + return true; + } + } + // If we got here, we didn't match a closing backtick sequence. + index = afterOpenTicks; + appendText(ticks); + return true; + } + + /** + * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. + */ + private boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { + DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); + if (res == null) { + return false; + } + int length = res.count; + int startIndex = index; + + index += length; + Text node = appendText(input, startIndex, index); + + // Add entry to stack for this opener + lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, lastDelimiter); + lastDelimiter.length = length; + lastDelimiter.originalLength = length; + if (lastDelimiter.previous != null) { + lastDelimiter.previous.next = lastDelimiter; + } + + return true; + } + + /** + * Add open bracket to delimiter stack and add a text node to block's children. + */ + private boolean parseOpenBracket() { + int startIndex = index; + index++; + + Text node = appendText("["); + + // Add entry to stack for this opener + addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); + + return true; + } + + /** + * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. + * Otherwise just add a text node. + */ + private boolean parseBang() { + int startIndex = index; + index++; + if (peek() == '[') { + index++; + + Text node = appendText("!["); + + // Add entry to stack for this opener + addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); + } else { + appendText("!"); + } + return true; + } + + /** + * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a + * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. + */ + private boolean parseCloseBracket() { + index++; + int startIndex = index; + + // Get previous `[` or `![` + Bracket opener = lastBracket; + if (opener == null) { + // No matching opener, just return a literal. + appendText("]"); + return true; + } + + if (!opener.allowed) { + // Matching opener but it's not allowed, just return a literal. + appendText("]"); + removeLastBracket(); + return true; + } + + // Check to see if we have a link/image + + String dest = null; + String title = null; + boolean isLinkOrImage = false; + + // Maybe a inline link like `[foo](/uri "title")` + if (peek() == '(') { + index++; + spnl(); + if ((dest = parseLinkDestination()) != null) { + spnl(); + // title needs a whitespace before + if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { + title = parseLinkTitle(); + spnl(); + } + if (peek() == ')') { + index++; + isLinkOrImage = true; + } else { + index = startIndex; + } + } + } + + // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` + if (!isLinkOrImage) { + + // See if there's a link label like `[bar]` or `[]` + int beforeLabel = index; + int labelLength = parseLinkLabel(); + String ref = null; + if (labelLength > 2) { + ref = input.substring(beforeLabel, beforeLabel + labelLength); + } else if (!opener.bracketAfter) { + // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. + // But it can only be a reference when there's no (unescaped) bracket in it. + // If there is, we don't even need to try to look up the reference. This is an optimization. + ref = input.substring(opener.index, startIndex); + } + + if (ref != null) { + Link link = referenceMap.get(Escaping.normalizeReference(ref)); + if (link != null) { + dest = link.getDestination(); + title = link.getTitle(); + isLinkOrImage = true; + } + } + } + + if (isLinkOrImage) { + // If we got here, open is a potential opener + Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); + + Node node = opener.node.getNext(); + while (node != null) { + Node next = node.getNext(); + linkOrImage.appendChild(node); + node = next; + } + appendNode(linkOrImage); + + // Process delimiters such as emphasis inside link/image + processDelimiters(opener.previousDelimiter); + mergeChildTextNodes(linkOrImage); + // We don't need the corresponding text node anymore, we turned it into a link/image node + opener.node.unlink(); + removeLastBracket(); + + // Links within links are not allowed. We found this link, so there can be no other link around it. + if (!opener.image) { + Bracket bracket = lastBracket; + while (bracket != null) { + if (!bracket.image) { + // Disallow link opener. It will still get matched, but will not result in a link. + bracket.allowed = false; + } + bracket = bracket.previous; + } + } + + return true; + + } else { // no link or image + + appendText("]"); + removeLastBracket(); + + index = startIndex; + return true; + } + } + + private void addBracket(Bracket bracket) { + if (lastBracket != null) { + lastBracket.bracketAfter = true; + } + lastBracket = bracket; + } + + private void removeLastBracket() { + lastBracket = lastBracket.previous; + } + + /** + * Attempt to parse link destination, returning the string or null if no match. + */ + private String parseLinkDestination() { + String res = match(LINK_DESTINATION_BRACES); + if (res != null) { // chop off surrounding <..>: + if (res.length() == 2) { + return ""; + } else { + return Escaping.unescapeString(res.substring(1, res.length() - 1)); + } + } else { + int startIndex = index; + parseLinkDestinationWithBalancedParens(); + return Escaping.unescapeString(input.substring(startIndex, index)); + } + } + + private void parseLinkDestinationWithBalancedParens() { + int parens = 0; + while (true) { + char c = peek(); + switch (c) { + case '\0': + return; + case '\\': + // check if we have an escapable character + if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { + // skip over the escaped character (after switch) + index++; + break; + } + // otherwise, we treat this as a literal backslash + break; + case '(': + parens++; + break; + case ')': + if (parens == 0) { + return; + } else { + parens--; + } + break; + case ' ': + // ASCII space + return; + default: + // or control character + if (Character.isISOControl(c)) { + return; + } + } + index++; + } + } + + /** + * Attempt to parse link title (sans quotes), returning the string or null if no match. + */ + private String parseLinkTitle() { + String title = match(LINK_TITLE); + if (title != null) { + // chop off quotes from title and unescape: + return Escaping.unescapeString(title.substring(1, title.length() - 1)); + } else { + return null; + } + } + + /** + * Attempt to parse a link label, returning number of characters parsed. + */ + private int parseLinkLabel() { + String m = match(LINK_LABEL); + // Spec says "A link label can have at most 999 characters inside the square brackets" + if (m == null || m.length() > 1001) { + return 0; + } else { + return m.length(); + } + } + + /** + * Attempt to parse an autolink (URL or email in pointy brackets). + */ + private boolean parseAutolink() { + String m; + if ((m = match(EMAIL_AUTOLINK)) != null) { + String dest = m.substring(1, m.length() - 1); + Link node = new Link("mailto:" + dest, null); + node.appendChild(new Text(dest)); + appendNode(node); + return true; + } else if ((m = match(AUTOLINK)) != null) { + String dest = m.substring(1, m.length() - 1); + Link node = new Link(dest, null); + node.appendChild(new Text(dest)); + appendNode(node); + return true; + } else { + return false; + } + } + + /** + * Attempt to parse inline HTML. + */ + private boolean parseHtmlInline() { + String m = match(HTML_TAG); + if (m != null) { + HtmlInline node = new HtmlInline(); + node.setLiteral(m); + appendNode(node); + return true; + } else { + return false; + } + } + + /** + * Attempt to parse an entity, return Entity object if successful. + */ + private boolean parseEntity() { + String m; + if ((m = match(ENTITY_HERE)) != null) { + appendText(Html5Entities.entityToString(m)); + return true; + } else { + return false; + } + } + + /** + * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. + */ + private boolean parseString() { + int begin = index; + int length = input.length(); + while (index != length) { + if (specialCharacters.get(input.charAt(index))) { + break; + } + index++; + } + if (begin != index) { + appendText(input, begin, index); + return true; + } else { + return false; + } + } + + /** + * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters + * and whether they are positioned such that they can open and/or close emphasis or strong emphasis. + * + * @return information about delimiter run, or {@code null} + */ + private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { + int startIndex = index; + + int delimiterCount = 0; + while (peek() == delimiterChar) { + delimiterCount++; + index++; + } + + if (delimiterCount < delimiterProcessor.getMinLength()) { + index = startIndex; + return null; + } + + String before = startIndex == 0 ? "\n" : + input.substring(startIndex - 1, startIndex); + + char charAfter = peek(); + String after = charAfter == '\0' ? "\n" : + String.valueOf(charAfter); + + // We could be more lazy here, in most cases we don't need to do every match case. + boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches(); + boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches(); + boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches(); + boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches(); + + boolean leftFlanking = !afterIsWhitespace && + (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); + boolean rightFlanking = !beforeIsWhitespace && + (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation); + boolean canOpen; + boolean canClose; + if (delimiterChar == '_') { + canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation); + canClose = rightFlanking && (!leftFlanking || afterIsPunctuation); + } else { + canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter(); + canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); + } + + index = startIndex; + return new DelimiterData(delimiterCount, canOpen, canClose); + } + + private void processDelimiters(Delimiter stackBottom) { + + Map openersBottom = new HashMap<>(); + + // find first closer above stackBottom: + Delimiter closer = lastDelimiter; + while (closer != null && closer.previous != stackBottom) { + closer = closer.previous; + } + // move forward, looking for closers, and handling each + while (closer != null) { + char delimiterChar = closer.delimiterChar; + + DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar); + if (!closer.canClose || delimiterProcessor == null) { + closer = closer.next; + continue; + } + + char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); + + // Found delimiter closer. Now look back for first matching opener. + int useDelims = 0; + boolean openerFound = false; + boolean potentialOpenerFound = false; + Delimiter opener = closer.previous; + while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { + if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { + potentialOpenerFound = true; + useDelims = delimiterProcessor.getDelimiterUse(opener, closer); + if (useDelims > 0) { + openerFound = true; + break; + } + } + opener = opener.previous; + } + + if (!openerFound) { + if (!potentialOpenerFound) { + // Set lower bound for future searches for openers. + // Only do this when we didn't even have a potential + // opener (one that matches the character and can open). + // If an opener was rejected because of the number of + // delimiters (e.g. because of the "multiple of 3" rule), + // we want to consider it next time because the number + // of delimiters can change as we continue processing. + openersBottom.put(delimiterChar, closer.previous); + if (!closer.canOpen) { + // We can remove a closer that can't be an opener, + // once we've seen there's no matching opener: + removeDelimiterKeepNode(closer); + } + } + closer = closer.next; + continue; + } + + Text openerNode = opener.node; + Text closerNode = closer.node; + + // Remove number of used delimiters from stack and inline nodes. + opener.length -= useDelims; + closer.length -= useDelims; + openerNode.setLiteral( + openerNode.getLiteral().substring(0, + openerNode.getLiteral().length() - useDelims)); + closerNode.setLiteral( + closerNode.getLiteral().substring(0, + closerNode.getLiteral().length() - useDelims)); + + removeDelimitersBetween(opener, closer); + // The delimiter processor can re-parent the nodes between opener and closer, + // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. + mergeTextNodesBetweenExclusive(openerNode, closerNode); + delimiterProcessor.process(openerNode, closerNode, useDelims); + + // No delimiter characters left to process, so we can remove delimiter and the now empty node. + if (opener.length == 0) { + removeDelimiterAndNode(opener); + } + + if (closer.length == 0) { + Delimiter next = closer.next; + removeDelimiterAndNode(closer); + closer = next; + } + } + + // remove all delimiters + while (lastDelimiter != null && lastDelimiter != stackBottom) { + removeDelimiterKeepNode(lastDelimiter); + } + } + + private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { + Delimiter delimiter = closer.previous; + while (delimiter != null && delimiter != opener) { + Delimiter previousDelimiter = delimiter.previous; + removeDelimiterKeepNode(delimiter); + delimiter = previousDelimiter; + } + } + + /** + * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. + */ + private void removeDelimiterAndNode(Delimiter delim) { + Text node = delim.node; + node.unlink(); + removeDelimiter(delim); + } + + /** + * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. + */ + private void removeDelimiterKeepNode(Delimiter delim) { + removeDelimiter(delim); + } + + private void removeDelimiter(Delimiter delim) { + if (delim.previous != null) { + delim.previous.next = delim.next; + } + if (delim.next == null) { + // top of stack + lastDelimiter = delim.previous; + } else { + delim.next.previous = delim.previous; + } + } + + private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { + // No nodes between them + if (fromNode == toNode || fromNode.getNext() == toNode) { + return; + } + + mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); + } + + private void mergeChildTextNodes(Node node) { + // No children or just one child node, no need for merging + if (node.getFirstChild() == node.getLastChild()) { + return; + } + + mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); + } + + private void mergeTextNodesInclusive(Node fromNode, Node toNode) { + Text first = null; + Text last = null; + int length = 0; + + Node node = fromNode; + while (node != null) { + if (node instanceof Text) { + Text text = (Text) node; + if (first == null) { + first = text; + } + length += text.getLiteral().length(); + last = text; + } else { + mergeIfNeeded(first, last, length); + first = null; + last = null; + length = 0; + } + if (node == toNode) { + break; + } + node = node.getNext(); + } + + mergeIfNeeded(first, last, length); + } + + private void mergeIfNeeded(Text first, Text last, int textLength) { + if (first != null && last != null && first != last) { + StringBuilder sb = new StringBuilder(textLength); + sb.append(first.getLiteral()); + Node node = first.getNext(); + Node stop = last.getNext(); + while (node != stop) { + sb.append(((Text) node).getLiteral()); + Node unlink = node; + node = node.getNext(); + unlink.unlink(); + } + String literal = sb.toString(); + first.setLiteral(literal); + } + } + + private static class DelimiterData { + + final int count; + final boolean canClose; + final boolean canOpen; + + DelimiterData(int count, boolean canOpen, boolean canClose) { + this.count = count; + this.canOpen = canOpen; + this.canClose = canClose; + } + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java new file mode 100644 index 00000000..6c18ab64 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java @@ -0,0 +1,51 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.node.HardLineBreak; +import org.commonmark.node.Node; +import org.commonmark.node.SoftLineBreak; +import org.commonmark.node.Text; + +import java.util.Collection; +import java.util.Collections; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NewLineInline extends Inline { + + private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); + + @NonNull + @Override + public Collection characters() { + return Collections.singleton('\n'); + } + + @Override + public boolean parse() { + index++; // assume we're at a \n + + Node lastChild = block.getLastChild(); + // Check previous text for trailing spaces. + // The "endsWith" is an optimization to avoid an RE match in the common case. + if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { + Text text = (Text) lastChild; + String literal = text.getLiteral(); + Matcher matcher = FINAL_SPACE.matcher(literal); + int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; + if (spaces > 0) { + text.setLiteral(literal.substring(0, literal.length() - spaces)); + } + appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); + } else { + appendNode(new SoftLineBreak()); + } + + // gobble leading spaces in next line + while (peek() == ' ') { + index++; + } + return true; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java new file mode 100644 index 00000000..c4fe20d9 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java @@ -0,0 +1,31 @@ +package io.noties.markwon.sample.editor.inline; + +import androidx.annotation.NonNull; + +import org.commonmark.internal.Bracket; +import org.commonmark.node.Text; + +import java.util.Collection; +import java.util.Collections; + +public class OpenBracketInline extends Inline { + @NonNull + @Override + public Collection characters() { + return Collections.singleton('['); + } + + @Override + public boolean parse() { + + int startIndex = index; + index++; + + Text node = appendText("["); + + // Add entry to stack for this opener + addBracket(Bracket.link(node, startIndex, lastBracket(), lastDelimiter())); + + return true; + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java new file mode 100644 index 00000000..7765ca54 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java @@ -0,0 +1,76 @@ +package io.noties.markwon.sample.editor.inline; + +import org.commonmark.node.Text; +import org.commonmark.parser.delimiter.DelimiterProcessor; +import org.commonmark.parser.delimiter.DelimiterRun; + +import java.util.LinkedList; +import java.util.ListIterator; + +class StaggeredDelimiterProcessor implements DelimiterProcessor { + + private final char delim; + private int minLength = 0; + private LinkedList processors = new LinkedList<>(); // in reverse getMinLength order + + StaggeredDelimiterProcessor(char delim) { + this.delim = delim; + } + + + @Override + public char getOpeningCharacter() { + return delim; + } + + @Override + public char getClosingCharacter() { + return delim; + } + + @Override + public int getMinLength() { + return minLength; + } + + void add(DelimiterProcessor dp) { + final int len = dp.getMinLength(); + ListIterator it = processors.listIterator(); + boolean added = false; + while (it.hasNext()) { + DelimiterProcessor p = it.next(); + int pLen = p.getMinLength(); + if (len > pLen) { + it.previous(); + it.add(dp); + added = true; + break; + } else if (len == pLen) { + throw new IllegalArgumentException("Cannot add two delimiter processors for char '" + delim + "' and minimum length " + len); + } + } + if (!added) { + processors.add(dp); + this.minLength = len; + } + } + + private DelimiterProcessor findProcessor(int len) { + for (DelimiterProcessor p : processors) { + if (p.getMinLength() <= len) { + return p; + } + } + return processors.getFirst(); + } + + @Override + public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) { + return findProcessor(opener.length()).getDelimiterUse(opener, closer); + } + + @Override + public void process(Text opener, Text closer, int delimiterUse) { + findProcessor(delimiterUse).process(opener, closer, delimiterUse); + } +} From 93a14b47313ec6c0f20d8c36f52938d658da0ed7 Mon Sep 17 00:00:00 2001 From: Dimitry Ivanov Date: Wed, 13 Nov 2019 13:38:59 +0300 Subject: [PATCH 2/4] Created inline-parser module --- markwon-inline-parser/README.md | 20 + markwon-inline-parser/build.gradle | 21 + markwon-inline-parser/gradle.properties | 4 + .../src/main/AndroidManifest.xml | 1 + .../AsteriskDelimiterProcessor.java | 7 + .../inlineparser/AutolinkInlineProcessor.java | 20 +- .../BackslashInlineProcessor.java | 22 +- .../BackticksInlineProcessor.java | 22 +- .../inlineparser/BangInlineProcessor.java | 21 +- .../CloseBracketInlineProcessor.java | 27 +- .../inlineparser/EntityInlineProcessor.java | 20 +- .../inlineparser/HtmlInlineProcessor.java | 23 +- .../inlineparser/InlineParserUtils.java | 77 ++ .../markwon/inlineparser/InlineProcessor.java | 148 ++ .../inlineparser/MarkwonInlineParser.java | 785 +++++------ .../MarkwonInlineParserContext.java | 65 + .../inlineparser/NewLineInlineProcessor.java | 18 +- .../OpenBracketInlineProcessor.java | 22 +- .../StaggeredDelimiterProcessor.java | 3 +- .../UnderscoreDelimiterProcessor.java | 7 + sample/build.gradle | 1 + .../markwon/sample/editor/EditorActivity.java | 94 +- .../markwon/sample/editor/inline/Inline.java | 429 ------ .../sample/editor/inline/InlineContext.java | 62 - .../editor/inline/InlineParserImpl.java | 1190 ----------------- settings.gradle | 1 + 26 files changed, 790 insertions(+), 2320 deletions(-) create mode 100644 markwon-inline-parser/README.md create mode 100644 markwon-inline-parser/build.gradle create mode 100644 markwon-inline-parser/gradle.properties create mode 100644 markwon-inline-parser/src/main/AndroidManifest.xml create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java (77%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java (57%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java (72%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java (60%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java (88%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java (62%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java (69%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java (56%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java rename sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java (80%) rename sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java (52%) rename {sample/src/main/java/io/noties/markwon/sample/editor/inline => markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser}/StaggeredDelimiterProcessor.java (97%) create mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java delete mode 100644 sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java diff --git a/markwon-inline-parser/README.md b/markwon-inline-parser/README.md new file mode 100644 index 00000000..5b0e1335 --- /dev/null +++ b/markwon-inline-parser/README.md @@ -0,0 +1,20 @@ +# Inline parser + +**Experimental** due to usage of internal (but still visible) classes of commonmark-java: + +```java +import org.commonmark.internal.Bracket; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.util.Html5Entities; +import org.commonmark.internal.util.Parsing; +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +``` + +`StaggeredDelimiterProcessor` class source is copied (required for InlineParser) \ No newline at end of file diff --git a/markwon-inline-parser/build.gradle b/markwon-inline-parser/build.gradle new file mode 100644 index 00000000..d7a2ed99 --- /dev/null +++ b/markwon-inline-parser/build.gradle @@ -0,0 +1,21 @@ +apply plugin: 'com.android.library' + +android { + + compileSdkVersion config['compile-sdk'] + buildToolsVersion config['build-tools'] + + defaultConfig { + minSdkVersion config['min-sdk'] + targetSdkVersion config['target-sdk'] + versionCode 1 + versionName version + } +} + +dependencies { + api deps['x-annotations'] + api deps['commonmark'] +} + +registerArtifact(this) \ No newline at end of file diff --git a/markwon-inline-parser/gradle.properties b/markwon-inline-parser/gradle.properties new file mode 100644 index 00000000..d386a6a9 --- /dev/null +++ b/markwon-inline-parser/gradle.properties @@ -0,0 +1,4 @@ +POM_NAME=Inline Parser +POM_ARTIFACT_ID=inline-parser +POM_DESCRIPTION=Markwon customizable commonmark-java InlineParse +POM_PACKAGING=aar \ No newline at end of file diff --git a/markwon-inline-parser/src/main/AndroidManifest.xml b/markwon-inline-parser/src/main/AndroidManifest.xml new file mode 100644 index 00000000..1a8bcbb5 --- /dev/null +++ b/markwon-inline-parser/src/main/AndroidManifest.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java new file mode 100644 index 00000000..3a8d570e --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java @@ -0,0 +1,7 @@ +package io.noties.markwon.inlineparser; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class AsteriskDelimiterProcessor extends org.commonmark.internal.inline.AsteriskDelimiterProcessor { +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java similarity index 77% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java index beaa72c4..6351fe64 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/AutolinkInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AutolinkInlineProcessor.java @@ -1,15 +1,16 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.Link; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class AutolinkInline extends Inline { +/** + * Parses autolinks, for example {@code } + * + * @since 4.2.0-SNAPSHOT + */ +public class AutolinkInlineProcessor extends InlineProcessor { private static final Pattern EMAIL_AUTOLINK = Pattern .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); @@ -17,14 +18,13 @@ public class AutolinkInline extends Inline { private static final Pattern AUTOLINK = Pattern .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - @NonNull @Override - public Collection characters() { - return Collections.singleton('<'); + public char specialCharacter() { + return '<'; } @Override - public boolean parse() { + protected boolean parse() { String m; if ((m = match(EMAIL_AUTOLINK)) != null) { String dest = m.substring(1, m.length() - 1); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java similarity index 57% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java index 72b21060..e8f433ca 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackslashInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackslashInlineProcessor.java @@ -1,21 +1,23 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.HardLineBreak; -import java.util.Collection; -import java.util.Collections; +import java.util.regex.Pattern; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class BackslashInlineProcessor extends InlineProcessor { + + private static final Pattern ESCAPABLE = MarkwonInlineParser.ESCAPABLE; -public class BackslashInline extends Inline { - @NonNull @Override - public Collection characters() { - return Collections.singleton('\\'); + public char specialCharacter() { + return '\\'; } @Override - public boolean parse() { + protected boolean parse() { index++; if (peek() == '\n') { appendNode(new HardLineBreak()); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java similarity index 72% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java index 4ead3d4d..f0c8da9c 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BackticksInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BackticksInlineProcessor.java @@ -1,27 +1,29 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.Code; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class BackticksInline extends Inline { +/** + * Parses inline code surrounded with {@code `} chars {@code `code`} + * + * @since 4.2.0-SNAPSHOT + */ +public class BackticksInlineProcessor extends InlineProcessor { private static final Pattern TICKS = Pattern.compile("`+"); private static final Pattern TICKS_HERE = Pattern.compile("^`+"); - @NonNull + private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE; + @Override - public Collection characters() { - return Collections.singleton('`'); + public char specialCharacter() { + return '`'; } @Override - public boolean parse() { + protected boolean parse() { String ticks = match(TICKS_HERE); if (ticks == null) { return false; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java similarity index 60% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java index 0416f40c..7b9995ac 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/BangInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/BangInlineProcessor.java @@ -1,22 +1,21 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; - -public class BangInline extends Inline { - @NonNull +/** + * Parses markdown images {@code ![alt](#href)} + * + * @since 4.2.0-SNAPSHOT + */ +public class BangInlineProcessor extends InlineProcessor { @Override - public Collection characters() { - return Collections.singleton('!'); + public char specialCharacter() { + return '!'; } @Override - public boolean parse() { + protected boolean parse() { int startIndex = index; index++; if (peek() == '[') { diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java similarity index 88% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java index 78366685..d48f0da2 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/CloseBracketInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/CloseBracketInlineProcessor.java @@ -1,6 +1,4 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.internal.util.Escaping; @@ -8,18 +6,27 @@ import org.commonmark.node.Image; import org.commonmark.node.Link; import org.commonmark.node.Node; -import java.util.Collection; -import java.util.Collections; +import java.util.regex.Pattern; + +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes; + +/** + * Parses markdown link or image, relies on {@link OpenBracketInlineProcessor} + * to handle start of these elements + * + * @since 4.2.0-SNAPSHOT + */ +public class CloseBracketInlineProcessor extends InlineProcessor { + + private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE; -public class CloseBracketInline extends Inline { - @NonNull @Override - public Collection characters() { - return Collections.singleton(']'); + public char specialCharacter() { + return ']'; } @Override - public boolean parse() { + protected boolean parse() { index++; int startIndex = index; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java similarity index 62% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java index f7592aed..c1229bd8 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/EntityInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/EntityInlineProcessor.java @@ -1,26 +1,26 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.util.Html5Entities; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class EntityInline extends Inline { +/** + * Parses HTML entities {@code &} + * + * @since 4.2.0-SNAPSHOT + */ +public class EntityInlineProcessor extends InlineProcessor { private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); - @NonNull @Override - public Collection characters() { - return Collections.singleton('&'); + public char specialCharacter() { + return '&'; } @Override - public boolean parse() { + protected boolean parse() { String m; if ((m = match(ENTITY_HERE)) != null) { appendText(Html5Entities.entityToString(m)); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java similarity index 69% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java index 34686ec2..2872491c 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/HtmlInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/HtmlInlineProcessor.java @@ -1,14 +1,16 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.util.Parsing; +import org.commonmark.node.HtmlInline; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Pattern; -public class HtmlInline extends Inline { +/** + * Parses inline HTML tags + * + * @since 4.2.0-SNAPSHOT + */ +public class HtmlInlineProcessor extends InlineProcessor { private static final String HTMLCOMMENT = "|"; private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; @@ -18,17 +20,16 @@ public class HtmlInline extends Inline { + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - @NonNull @Override - public Collection characters() { - return Collections.singleton('<'); + public char specialCharacter() { + return '<'; } @Override - public boolean parse() { + protected boolean parse() { String m = match(HTML_TAG); if (m != null) { - org.commonmark.node.HtmlInline node = new org.commonmark.node.HtmlInline(); + HtmlInline node = new HtmlInline(); node.setLiteral(m); appendNode(node); return true; diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java new file mode 100644 index 00000000..544576ee --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineParserUtils.java @@ -0,0 +1,77 @@ +package io.noties.markwon.inlineparser; + +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +/** + * @since 4.2.0-SNAPSHOT + */ +public abstract class InlineParserUtils { + + public static void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { + // No nodes between them + if (fromNode == toNode || fromNode.getNext() == toNode) { + return; + } + + mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); + } + + public static void mergeChildTextNodes(Node node) { + // No children or just one child node, no need for merging + if (node.getFirstChild() == node.getLastChild()) { + return; + } + + mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); + } + + public static void mergeTextNodesInclusive(Node fromNode, Node toNode) { + Text first = null; + Text last = null; + int length = 0; + + Node node = fromNode; + while (node != null) { + if (node instanceof Text) { + Text text = (Text) node; + if (first == null) { + first = text; + } + length += text.getLiteral().length(); + last = text; + } else { + mergeIfNeeded(first, last, length); + first = null; + last = null; + length = 0; + } + if (node == toNode) { + break; + } + node = node.getNext(); + } + + mergeIfNeeded(first, last, length); + } + + public static void mergeIfNeeded(Text first, Text last, int textLength) { + if (first != null && last != null && first != last) { + StringBuilder sb = new StringBuilder(textLength); + sb.append(first.getLiteral()); + Node node = first.getNext(); + Node stop = last.getNext(); + while (node != stop) { + sb.append(((Text) node).getLiteral()); + Node unlink = node; + node = node.getNext(); + unlink.unlink(); + } + String literal = sb.toString(); + first.setLiteral(literal); + } + } + + private InlineParserUtils() { + } +} diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java new file mode 100644 index 00000000..2462e324 --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/InlineProcessor.java @@ -0,0 +1,148 @@ +package io.noties.markwon.inlineparser; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +import java.util.Map; +import java.util.regex.Pattern; + +/** + * @see AutolinkInlineProcessor + * @see BackslashInlineProcessor + * @see BackticksInlineProcessor + * @see BangInlineProcessor + * @see CloseBracketInlineProcessor + * @see EntityInlineProcessor + * @see HtmlInlineProcessor + * @see NewLineInlineProcessor + * @see OpenBracketInlineProcessor + * @see MarkwonInlineParser.FactoryBuilder#addInlineProcessor(InlineProcessor) + * @see MarkwonInlineParser.FactoryBuilder#excludeInlineProcessor(Class) + * @since 4.2.0-SNAPSHOT + */ +public abstract class InlineProcessor { + + /** + * Special character that triggers parsing attempt + */ + public abstract char specialCharacter(); + + /** + * @return boolean indicating if parsing succeeded + */ + protected abstract boolean parse(); + + + protected MarkwonInlineParserContext context; + protected Node block; + protected String input; + protected int index; + + public boolean parse(@NonNull MarkwonInlineParserContext context) { + this.context = context; + this.block = context.block(); + this.input = context.input(); + this.index = context.index(); + + final boolean result = parse(); + + // synchronize index + context.setIndex(index); + + return result; + } + + protected Bracket lastBracket() { + return context.lastBracket(); + } + + protected Delimiter lastDelimiter() { + return context.lastDelimiter(); + } + + @NonNull + protected Map referenceMap() { + return context.referenceMap(); + } + + protected void addBracket(Bracket bracket) { + context.addBracket(bracket); + } + + protected void removeLastBracket() { + context.removeLastBracket(); + } + + protected void spnl() { + context.setIndex(index); + context.spnl(); + index = context.index(); + } + + @Nullable + protected String match(@NonNull Pattern re) { + // before trying to match, we must notify context about our index (which we store additionally here) + context.setIndex(index); + + final String result = context.match(re); + + // after match we must reflect index change here + this.index = context.index(); + + return result; + } + + @Nullable + protected String parseLinkDestination() { + context.setIndex(index); + final String result = context.parseLinkDestination(); + this.index = context.index(); + return result; + } + + @Nullable + protected String parseLinkTitle() { + context.setIndex(index); + final String result = context.parseLinkTitle(); + this.index = context.index(); + return result; + } + + protected int parseLinkLabel() { + context.setIndex(index); + final int result = context.parseLinkLabel(); + this.index = context.index(); + return result; + } + + protected void processDelimiters(Delimiter stackBottom) { + context.setIndex(index); + context.processDelimiters(stackBottom); + this.index = context.index(); + } + + protected void appendNode(@NonNull Node node) { + context.appendNode(node); + } + + @NonNull + protected Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) { + return context.appendText(text, beginIndex, endIndex); + } + + @NonNull + protected Text appendText(@NonNull CharSequence text) { + return context.appendText(text); + } + + protected char peek() { + context.setIndex(index); + return context.peek(); + } +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java similarity index 56% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java index ee45884d..5bdda362 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserOriginal.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java @@ -1,27 +1,21 @@ -package io.noties.markwon.sample.editor.inline; +package io.noties.markwon.inlineparser; import androidx.annotation.NonNull; +import androidx.annotation.Nullable; import org.commonmark.internal.Bracket; import org.commonmark.internal.Delimiter; import org.commonmark.internal.ReferenceParser; -import org.commonmark.internal.inline.AsteriskDelimiterProcessor; -import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; import org.commonmark.internal.util.Escaping; -import org.commonmark.internal.util.Html5Entities; -import org.commonmark.internal.util.Parsing; -import org.commonmark.node.Code; -import org.commonmark.node.HardLineBreak; -import org.commonmark.node.HtmlInline; -import org.commonmark.node.Image; import org.commonmark.node.Link; import org.commonmark.node.Node; -import org.commonmark.node.SoftLineBreak; import org.commonmark.node.Text; import org.commonmark.parser.InlineParser; +import org.commonmark.parser.InlineParserContext; import org.commonmark.parser.InlineParserFactory; import org.commonmark.parser.delimiter.DelimiterProcessor; +import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.HashMap; @@ -31,28 +25,66 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class InlineParserOriginal implements InlineParser, ReferenceParser { +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes; +import static io.noties.markwon.inlineparser.InlineParserUtils.mergeTextNodesBetweenExclusive; + +/** + * @see #factoryBuilder() + * @see FactoryBuilder + * @since 4.2.0-SNAPSHOT + */ +public class MarkwonInlineParser implements InlineParser, ReferenceParser, MarkwonInlineParserContext { + + public interface FactoryBuilder { + + /** + * @see InlineProcessor + */ + @NonNull + FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor); + + /** + * @see AsteriskDelimiterProcessor + * @see UnderscoreDelimiterProcessor + */ + @NonNull + FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor); + + /** + * Indicate if markdown references are enabled. {@code referencesEnabled=true} if {@link #includeDefaults()} + * was called + */ + @NonNull + FactoryBuilder referencesEnabled(boolean referencesEnabled); + + /** + * Includes all default delimiter and inline processors, and sets {@code referencesEnabled=true}. + * Useful with subsequent calls to {@link #excludeInlineProcessor(Class)} or {@link #excludeDelimiterProcessor(Class)} + */ + @NonNull + FactoryBuilder includeDefaults(); + + @NonNull + FactoryBuilder excludeInlineProcessor(@NonNull Class processor); + + @NonNull + FactoryBuilder excludeDelimiterProcessor(@NonNull Class processor); + + @NonNull + InlineParserFactory build(); + } @NonNull - public static InlineParserFactory factory() { - return context -> new InlineParserOriginal(context.getCustomDelimiterProcessors()); + public static FactoryBuilder factoryBuilder() { + return new FactoryBuilderImpl(); } private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; - private static final String HTMLCOMMENT = "|"; - private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; - private static final String DECLARATION = "]*>"; - private static final String CDATA = ""; - private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT - + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; - private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; private static final Pattern PUNCTUATION = Pattern .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); - private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - private static final Pattern LINK_TITLE = Pattern.compile( "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + '|' + @@ -64,43 +96,29 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - - private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); - - private static final Pattern TICKS = Pattern.compile("`+"); - - private static final Pattern TICKS_HERE = Pattern.compile("^`+"); - - private static final Pattern EMAIL_AUTOLINK = Pattern - .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); - - private static final Pattern AUTOLINK = Pattern - .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); - private static final Pattern WHITESPACE = Pattern.compile("\\s+"); - - private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); + static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); + static final Pattern WHITESPACE = Pattern.compile("\\s+"); + + private final boolean referencesEnabled; + private final BitSet specialCharacters; - private final BitSet delimiterCharacters; + private final Map> inlineProcessors; private final Map delimiterProcessors; + private Node block; + private String input; + private int index; + /** * Link references by ID, needs to be built up using parseReference before calling parse. */ - private Map referenceMap = new HashMap<>(); - - private Node block; - - private String input; - private int index; + private Map referenceMap = new HashMap<>(1); /** * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different @@ -113,37 +131,49 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { */ private Bracket lastBracket; - public InlineParserOriginal(List delimiterProcessors) { + // might we construct these in factory? + public MarkwonInlineParser( + boolean referencesEnabled, + @NonNull List inlineProcessors, + @NonNull List delimiterProcessors) { + this.referencesEnabled = referencesEnabled; + this.inlineProcessors = calculateInlines(inlineProcessors); this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); - this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); - this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); + this.specialCharacters = calculateSpecialCharacters( + this.inlineProcessors.keySet(), + this.delimiterProcessors.keySet()); } - public static BitSet calculateDelimiterCharacters(Set characters) { - BitSet bitSet = new BitSet(); - for (Character character : characters) { - bitSet.set(character); + @NonNull + private static Map> calculateInlines(@NonNull List inlines) { + final Map> map = new HashMap<>(inlines.size()); + List list; + for (InlineProcessor inlineProcessor : inlines) { + final char character = inlineProcessor.specialCharacter(); + list = map.get(character); + if (list == null) { + list = new ArrayList<>(1); + map.put(character, list); + } + list.add(inlineProcessor); + } + return map; + } + + @NonNull + private static BitSet calculateSpecialCharacters(Set inlineCharacters, Set delimiterCharacters) { + final BitSet bitSet = new BitSet(); + for (Character c : inlineCharacters) { + bitSet.set(c); + } + for (Character c : delimiterCharacters) { + bitSet.set(c); } return bitSet; } - public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { - BitSet bitSet = new BitSet(); - bitSet.or(delimiterCharacters); - bitSet.set('\n'); - bitSet.set('`'); - bitSet.set('['); - bitSet.set(']'); - bitSet.set('\\'); - bitSet.set('!'); - bitSet.set('<'); - bitSet.set('&'); - return bitSet; - } - - public static Map calculateDelimiterProcessors(List delimiterProcessors) { + private static Map calculateDelimiterProcessors(List delimiterProcessors) { Map map = new HashMap<>(); - addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); addDelimiterProcessors(delimiterProcessors, map); return map; } @@ -206,6 +236,11 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { */ @Override public int parseReference(String s) { + + if (!referencesEnabled) { + return 0; + } + this.input = s; this.index = 0; String dest; @@ -275,17 +310,22 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return index - startIndex; } - private Text appendText(CharSequence text, int beginIndex, int endIndex) { + @Override + @NonNull + public Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) { return appendText(text.subSequence(beginIndex, endIndex)); } - private Text appendText(CharSequence text) { + @Override + @NonNull + public Text appendText(@NonNull CharSequence text) { Text node = new Text(text.toString()); appendNode(node); return node; } - private void appendNode(Node node) { + @Override + public void appendNode(@NonNull Node node) { block.appendChild(node); } @@ -295,46 +335,33 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { * On failure, return false. */ private boolean parseInline() { - boolean res; - char c = peek(); + + final char c = peek(); + if (c == '\0') { return false; } - switch (c) { - case '\n': - res = parseNewline(); - break; - case '\\': - res = parseBackslash(); - break; - case '`': - res = parseBackticks(); - break; - case '[': - res = parseOpenBracket(); - break; - case '!': - res = parseBang(); - break; - case ']': - res = parseCloseBracket(); - break; - case '<': - res = parseAutolink() || parseHtmlInline(); - break; - case '&': - res = parseEntity(); - break; - default: - boolean isDelimiter = delimiterCharacters.get(c); - if (isDelimiter) { - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); - res = parseDelimiters(delimiterProcessor, c); - } else { - res = parseString(); + + boolean res = false; + + final List inlines = this.inlineProcessors.get(c); + + if (inlines != null) { + for (InlineProcessor inline : inlines) { + res = inline.parse(this); + if (res) { + break; } - break; + } + } else { + final DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); + if (delimiterProcessor != null) { + res = parseDelimiters(delimiterProcessor, c); + } else { + res = parseString(); + } } + if (!res) { index++; // When we get here, it's only for a single special character that turned out to not have a special meaning. @@ -349,7 +376,9 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * If RE matches at current index in the input, advance index and return the match; otherwise return null. */ - private String match(Pattern re) { + @Override + @Nullable + public String match(@NonNull Pattern re) { if (index >= input.length()) { return null; } @@ -367,7 +396,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. */ - private char peek() { + @Override + public char peek() { if (index < input.length()) { return input.charAt(index); } else { @@ -375,87 +405,66 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } + @NonNull + @Override + public Node block() { + return block; + } + + @NonNull + @Override + public String input() { + return input; + } + + @Override + public int index() { + return index; + } + + @Override + public void setIndex(int index) { + this.index = index; + } + + @Override + public Bracket lastBracket() { + return lastBracket; + } + + @Override + public Delimiter lastDelimiter() { + return lastDelimiter; + } + + @NonNull + @Override + public Map referenceMap() { + return referenceMap; + } + + @Override + public void addBracket(Bracket bracket) { + if (lastBracket != null) { + lastBracket.bracketAfter = true; + } + lastBracket = bracket; + } + + @Override + public void removeLastBracket() { + lastBracket = lastBracket.previous; + } + /** * Parse zero or more space characters, including at most one newline. */ - private boolean spnl() { + @Override + public boolean spnl() { match(SPNL); return true; } - /** - * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. - */ - private boolean parseNewline() { - index++; // assume we're at a \n - - Node lastChild = block.getLastChild(); - // Check previous text for trailing spaces. - // The "endsWith" is an optimization to avoid an RE match in the common case. - if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { - Text text = (Text) lastChild; - String literal = text.getLiteral(); - Matcher matcher = FINAL_SPACE.matcher(literal); - int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; - if (spaces > 0) { - text.setLiteral(literal.substring(0, literal.length() - spaces)); - } - appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); - } else { - appendNode(new SoftLineBreak()); - } - - // gobble leading spaces in next line - while (peek() == ' ') { - index++; - } - return true; - } - - /** - * Parse a backslash-escaped special character, adding either the escaped character, a hard line break - * (if the backslash is followed by a newline), or a literal backslash to the block's children. - */ - private boolean parseBackslash() { - index++; - if (peek() == '\n') { - appendNode(new HardLineBreak()); - index++; - } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { - appendText(input, index, index + 1); - index++; - } else { - appendText("\\"); - } - return true; - } - - /** - * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. - */ - private boolean parseBackticks() { - String ticks = match(TICKS_HERE); - if (ticks == null) { - return false; - } - int afterOpenTicks = index; - String matched; - while ((matched = match(TICKS)) != null) { - if (matched.equals(ticks)) { - Code node = new Code(); - String content = input.substring(afterOpenTicks, index - ticks.length()); - String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); - node.setLiteral(literal); - appendNode(node); - return true; - } - } - // If we got here, we didn't match a closing backtick sequence. - index = afterOpenTicks; - appendText(ticks); - return true; - } - /** * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. */ @@ -481,174 +490,12 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return true; } - /** - * Add open bracket to delimiter stack and add a text node to block's children. - */ - private boolean parseOpenBracket() { - int startIndex = index; - index++; - - Text node = appendText("["); - - // Add entry to stack for this opener - addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); - - return true; - } - - /** - * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. - * Otherwise just add a text node. - */ - private boolean parseBang() { - int startIndex = index; - index++; - if (peek() == '[') { - index++; - - Text node = appendText("!["); - - // Add entry to stack for this opener - addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); - } else { - appendText("!"); - } - return true; - } - - /** - * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a - * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. - */ - private boolean parseCloseBracket() { - index++; - int startIndex = index; - - // Get previous `[` or `![` - Bracket opener = lastBracket; - if (opener == null) { - // No matching opener, just return a literal. - appendText("]"); - return true; - } - - if (!opener.allowed) { - // Matching opener but it's not allowed, just return a literal. - appendText("]"); - removeLastBracket(); - return true; - } - - // Check to see if we have a link/image - - String dest = null; - String title = null; - boolean isLinkOrImage = false; - - // Maybe a inline link like `[foo](/uri "title")` - if (peek() == '(') { - index++; - spnl(); - if ((dest = parseLinkDestination()) != null) { - spnl(); - // title needs a whitespace before - if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { - title = parseLinkTitle(); - spnl(); - } - if (peek() == ')') { - index++; - isLinkOrImage = true; - } else { - index = startIndex; - } - } - } - - // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` - if (!isLinkOrImage) { - - // See if there's a link label like `[bar]` or `[]` - int beforeLabel = index; - int labelLength = parseLinkLabel(); - String ref = null; - if (labelLength > 2) { - ref = input.substring(beforeLabel, beforeLabel + labelLength); - } else if (!opener.bracketAfter) { - // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. - // But it can only be a reference when there's no (unescaped) bracket in it. - // If there is, we don't even need to try to look up the reference. This is an optimization. - ref = input.substring(opener.index, startIndex); - } - - if (ref != null) { - Link link = referenceMap.get(Escaping.normalizeReference(ref)); - if (link != null) { - dest = link.getDestination(); - title = link.getTitle(); - isLinkOrImage = true; - } - } - } - - if (isLinkOrImage) { - // If we got here, open is a potential opener - Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); - - Node node = opener.node.getNext(); - while (node != null) { - Node next = node.getNext(); - linkOrImage.appendChild(node); - node = next; - } - appendNode(linkOrImage); - - // Process delimiters such as emphasis inside link/image - processDelimiters(opener.previousDelimiter); - mergeChildTextNodes(linkOrImage); - // We don't need the corresponding text node anymore, we turned it into a link/image node - opener.node.unlink(); - removeLastBracket(); - - // Links within links are not allowed. We found this link, so there can be no other link around it. - if (!opener.image) { - Bracket bracket = lastBracket; - while (bracket != null) { - if (!bracket.image) { - // Disallow link opener. It will still get matched, but will not result in a link. - bracket.allowed = false; - } - bracket = bracket.previous; - } - } - - return true; - - } else { // no link or image - - appendText("]"); - removeLastBracket(); - - index = startIndex; - return true; - } - } - - private void addBracket(Bracket bracket) { - if (lastBracket != null) { - lastBracket.bracketAfter = true; - } - lastBracket = bracket; - } - - private void removeLastBracket() { - lastBracket = lastBracket.previous; - } - /** * Attempt to parse link destination, returning the string or null if no match. */ - private String parseLinkDestination() { + @Override + @Nullable + public String parseLinkDestination() { String res = match(LINK_DESTINATION_BRACES); if (res != null) { // chop off surrounding <..>: if (res.length() == 2) { @@ -705,7 +552,9 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Attempt to parse link title (sans quotes), returning the string or null if no match. */ - private String parseLinkTitle() { + @Override + @Nullable + public String parseLinkTitle() { String title = match(LINK_TITLE); if (title != null) { // chop off quotes from title and unescape: @@ -718,7 +567,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { /** * Attempt to parse a link label, returning number of characters parsed. */ - private int parseLinkLabel() { + @Override + public int parseLinkLabel() { String m = match(LINK_LABEL); // Spec says "A link label can have at most 999 characters inside the square brackets" if (m == null || m.length() > 1001) { @@ -728,56 +578,6 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } - /** - * Attempt to parse an autolink (URL or email in pointy brackets). - */ - private boolean parseAutolink() { - String m; - if ((m = match(EMAIL_AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link("mailto:" + dest, null); - node.appendChild(new Text(dest)); - appendNode(node); - return true; - } else if ((m = match(AUTOLINK)) != null) { - String dest = m.substring(1, m.length() - 1); - Link node = new Link(dest, null); - node.appendChild(new Text(dest)); - appendNode(node); - return true; - } else { - return false; - } - } - - /** - * Attempt to parse inline HTML. - */ - private boolean parseHtmlInline() { - String m = match(HTML_TAG); - if (m != null) { - HtmlInline node = new HtmlInline(); - node.setLiteral(m); - appendNode(node); - return true; - } else { - return false; - } - } - - /** - * Attempt to parse an entity, return Entity object if successful. - */ - private boolean parseEntity() { - String m; - if ((m = match(ENTITY_HERE)) != null) { - appendText(Html5Entities.entityToString(m)); - return true; - } else { - return false; - } - } - /** * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. */ @@ -849,7 +649,8 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { return new DelimiterData(delimiterCount, canOpen, canClose); } - private void processDelimiters(Delimiter stackBottom) { + @Override + public void processDelimiters(Delimiter stackBottom) { Map openersBottom = new HashMap<>(); @@ -981,70 +782,6 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { } } - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - private void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - private void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - private void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - private static class DelimiterData { final int count; @@ -1057,4 +794,120 @@ public class InlineParserOriginal implements InlineParser, ReferenceParser { this.canClose = canClose; } } + + static class FactoryBuilderImpl implements FactoryBuilder { + + private final List inlineProcessors = new ArrayList<>(3); + private final List delimiterProcessors = new ArrayList<>(3); + private boolean referencesEnabled; + + @NonNull + @Override + public FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor) { + this.inlineProcessors.add(processor); + return this; + } + + @NonNull + @Override + public FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor) { + this.delimiterProcessors.add(processor); + return this; + } + + @NonNull + @Override + public FactoryBuilder referencesEnabled(boolean referencesEnabled) { + this.referencesEnabled = referencesEnabled; + return this; + } + + @NonNull + @Override + public FactoryBuilder includeDefaults() { + + // by default enabled + this.referencesEnabled = true; + + this.inlineProcessors.addAll(Arrays.asList( + new AutolinkInlineProcessor(), + new BackslashInlineProcessor(), + new BackticksInlineProcessor(), + new BangInlineProcessor(), + new CloseBracketInlineProcessor(), + new EntityInlineProcessor(), + new HtmlInlineProcessor(), + new NewLineInlineProcessor(), + new OpenBracketInlineProcessor())); + + this.delimiterProcessors.addAll(Arrays.asList( + new AsteriskDelimiterProcessor(), + new UnderscoreDelimiterProcessor())); + + return this; + } + + @NonNull + @Override + public FactoryBuilder excludeInlineProcessor(@NonNull Class type) { + for (int i = 0, size = inlineProcessors.size(); i < size; i++) { + if (type.equals(inlineProcessors.get(i).getClass())) { + inlineProcessors.remove(i); + break; + } + } + return this; + } + + @NonNull + @Override + public FactoryBuilder excludeDelimiterProcessor(@NonNull Class type) { + for (int i = 0, size = delimiterProcessors.size(); i < size; i++) { + if (type.equals(delimiterProcessors.get(i).getClass())) { + delimiterProcessors.remove(i); + break; + } + } + return this; + } + + @NonNull + @Override + public InlineParserFactory build() { + return new InlineParserFactoryImpl(referencesEnabled, inlineProcessors, delimiterProcessors); + } + } + + static class InlineParserFactoryImpl implements InlineParserFactory { + + private final boolean referencesEnabled; + private final List inlineProcessors; + private final List delimiterProcessors; + + InlineParserFactoryImpl( + boolean referencesEnabled, + @NonNull List inlineProcessors, + @NonNull List delimiterProcessors) { + this.referencesEnabled = referencesEnabled; + this.inlineProcessors = inlineProcessors; + this.delimiterProcessors = delimiterProcessors; + } + + @Override + public InlineParser create(InlineParserContext inlineParserContext) { + final List delimiterProcessors; + final List customDelimiterProcessors = inlineParserContext.getCustomDelimiterProcessors(); + final int size = customDelimiterProcessors != null + ? customDelimiterProcessors.size() + : 0; + if (size > 0) { + delimiterProcessors = new ArrayList<>(size + this.delimiterProcessors.size()); + delimiterProcessors.addAll(this.delimiterProcessors); + delimiterProcessors.addAll(customDelimiterProcessors); + } else { + delimiterProcessors = this.delimiterProcessors; + } + return new MarkwonInlineParser(referencesEnabled, inlineProcessors, delimiterProcessors); + } + } } diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java new file mode 100644 index 00000000..726ff4eb --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParserContext.java @@ -0,0 +1,65 @@ +package io.noties.markwon.inlineparser; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.node.Link; +import org.commonmark.node.Node; +import org.commonmark.node.Text; + +import java.util.Map; +import java.util.regex.Pattern; + +public interface MarkwonInlineParserContext { + + @NonNull + Node block(); + + @NonNull + String input(); + + int index(); + + void setIndex(int index); + + Bracket lastBracket(); + + Delimiter lastDelimiter(); + + @NonNull + Map referenceMap(); + + void addBracket(Bracket bracket); + + void removeLastBracket(); + + boolean spnl(); + + /** + * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. + */ + char peek(); + + @Nullable + String match(@NonNull Pattern re); + + void appendNode(@NonNull Node node); + + @NonNull + Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex); + + @NonNull + Text appendText(@NonNull CharSequence text); + + @Nullable + String parseLinkDestination(); + + @Nullable + String parseLinkTitle(); + + int parseLinkLabel(); + + void processDelimiters(Delimiter stackBottom); +} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java similarity index 80% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java index 6c18ab64..4f08a74f 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/NewLineInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/NewLineInlineProcessor.java @@ -1,29 +1,27 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.node.HardLineBreak; import org.commonmark.node.Node; import org.commonmark.node.SoftLineBreak; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class NewLineInline extends Inline { +/** + * @since 4.2.0-SNAPSHOT + */ +public class NewLineInlineProcessor extends InlineProcessor { private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - @NonNull @Override - public Collection characters() { - return Collections.singleton('\n'); + public char specialCharacter() { + return '\n'; } @Override - public boolean parse() { + protected boolean parse() { index++; // assume we're at a \n Node lastChild = block.getLastChild(); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java similarity index 52% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java index c4fe20d9..02edf9bb 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/OpenBracketInline.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/OpenBracketInlineProcessor.java @@ -1,23 +1,21 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; +package io.noties.markwon.inlineparser; import org.commonmark.internal.Bracket; import org.commonmark.node.Text; -import java.util.Collection; -import java.util.Collections; - -public class OpenBracketInline extends Inline { - @NonNull +/** + * Parses markdown links {@code [link](#href)} + * + * @since 4.2.0-SNAPSHOT + */ +public class OpenBracketInlineProcessor extends InlineProcessor { @Override - public Collection characters() { - return Collections.singleton('['); + public char specialCharacter() { + return '['; } @Override - public boolean parse() { - + protected boolean parse() { int startIndex = index; index++; diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java similarity index 97% rename from sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java rename to markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java index 7765ca54..c2a92c3d 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/StaggeredDelimiterProcessor.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/StaggeredDelimiterProcessor.java @@ -1,4 +1,4 @@ -package io.noties.markwon.sample.editor.inline; +package io.noties.markwon.inlineparser; import org.commonmark.node.Text; import org.commonmark.parser.delimiter.DelimiterProcessor; @@ -17,7 +17,6 @@ class StaggeredDelimiterProcessor implements DelimiterProcessor { this.delim = delim; } - @Override public char getOpeningCharacter() { return delim; diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java new file mode 100644 index 00000000..83f7771f --- /dev/null +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java @@ -0,0 +1,7 @@ +package io.noties.markwon.inlineparser; + +/** + * @since 4.2.0-SNAPSHOT + */ +public class UnderscoreDelimiterProcessor extends org.commonmark.internal.inline.UnderscoreDelimiterProcessor { +} diff --git a/sample/build.gradle b/sample/build.gradle index a7dec247..d2a9e27f 100644 --- a/sample/build.gradle +++ b/sample/build.gradle @@ -41,6 +41,7 @@ dependencies { implementation project(':markwon-ext-tasklist') implementation project(':markwon-html') implementation project(':markwon-image') + implementation project(':markwon-inline-parser') implementation project(':markwon-linkify') implementation project(':markwon-recycler') implementation project(':markwon-recycler-table') diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java index c16053fd..17387418 100644 --- a/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java +++ b/sample/src/main/java/io/noties/markwon/sample/editor/EditorActivity.java @@ -18,18 +18,12 @@ import android.widget.TextView; import androidx.annotation.NonNull; import androidx.annotation.Nullable; -import org.commonmark.internal.inline.AsteriskDelimiterProcessor; -import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; -import org.commonmark.node.Link; -import org.commonmark.node.Text; +import org.commonmark.parser.InlineParserFactory; import org.commonmark.parser.Parser; import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.concurrent.Executors; -import java.util.regex.Pattern; import io.noties.markwon.AbstractMarkwonPlugin; import io.noties.markwon.Markwon; @@ -43,17 +37,12 @@ import io.noties.markwon.editor.PersistedSpans; import io.noties.markwon.editor.handler.EmphasisEditHandler; import io.noties.markwon.editor.handler.StrongEmphasisEditHandler; import io.noties.markwon.ext.strikethrough.StrikethroughPlugin; +import io.noties.markwon.inlineparser.BangInlineProcessor; +import io.noties.markwon.inlineparser.EntityInlineProcessor; +import io.noties.markwon.inlineparser.HtmlInlineProcessor; +import io.noties.markwon.inlineparser.MarkwonInlineParser; import io.noties.markwon.linkify.LinkifyPlugin; import io.noties.markwon.sample.R; -import io.noties.markwon.sample.editor.inline.AutolinkInline; -import io.noties.markwon.sample.editor.inline.BackslashInline; -import io.noties.markwon.sample.editor.inline.BackticksInline; -import io.noties.markwon.sample.editor.inline.CloseBracketInline; -import io.noties.markwon.sample.editor.inline.EntityInline; -import io.noties.markwon.sample.editor.inline.HtmlInline; -import io.noties.markwon.sample.editor.inline.Inline; -import io.noties.markwon.sample.editor.inline.InlineParserImpl; -import io.noties.markwon.sample.editor.inline.NewLineInline; public class EditorActivity extends Activity { @@ -187,66 +176,15 @@ public class EditorActivity extends Activity { // for links to be clickable editText.setMovementMethod(LinkMovementMethod.getInstance()); - // provider? - final InlineParserImpl.Builder inlineParserFactoryBuilder = InlineParserImpl.builder() - .addDelimiterProcessor(new AsteriskDelimiterProcessor()) - .addDelimiterProcessor(new UnderscoreDelimiterProcessor()) - .addInlineProcessor(new AutolinkInline()) - .addInlineProcessor(new BackslashInline()) - .addInlineProcessor(new BackticksInline()) -// .addInlineProcessor(new BangInline()) // no images then - .addInlineProcessor(new CloseBracketInline()) - .addInlineProcessor(new EntityInline()) - .addInlineProcessor(new HtmlInline()) - .addInlineProcessor(new NewLineInline()) - .addInlineProcessor(new Inline() { - - private final Pattern RE = Pattern.compile("\\d+"); - - @NonNull - @Override - public Collection characters() { - return Collections.singleton('#'); - } - - @Override - public boolean parse() { - final String id = match(RE); - if (id != null) { - final Link link = new Link("https://github.com/noties/Markwon/issues/" + id, null); - final Text text = new Text("#" + id); - link.appendChild(text); - appendNode(link); - return true; - } - return false; - } - }) - .addInlineProcessor(new Inline() { - - private final Pattern RE = Pattern.compile("\\w+"); - - @NonNull - @Override - public Collection characters() { - return Collections.singleton('#'); - } - - @Override - public boolean parse() { - final String s = match(RE); - if (s != null) { - final Link link = new Link("https://noties.io", null); - final Text text = new Text("#" + s); - link.appendChild(text); - appendNode(link); - return true; - } - return false; - } - }) -// .addInlineProcessor(new OpenBracketInline()) - ; + final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + // no inline images will be parsed + .excludeInlineProcessor(BangInlineProcessor.class) + // no html tags will be parsed + .excludeInlineProcessor(HtmlInlineProcessor.class) + // no entities will be parsed (aka `&` etc) + .excludeInlineProcessor(EntityInlineProcessor.class) + .build(); final Markwon markwon = Markwon.builder(this) .usePlugin(StrikethroughPlugin.create()) @@ -254,9 +192,11 @@ public class EditorActivity extends Activity { .usePlugin(new AbstractMarkwonPlugin() { @Override public void configureParser(@NonNull Parser.Builder builder) { + // disable all commonmark-java blocks, only inlines will be parsed // builder.enabledBlockTypes(Collections.emptySet()); - builder.inlineParserFactory(inlineParserFactoryBuilder.build()); + + builder.inlineParserFactory(inlineParserFactory); } }) .build(); diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java deleted file mode 100644 index de9326a6..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/Inline.java +++ /dev/null @@ -1,429 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; - -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; -import org.commonmark.internal.util.Escaping; -import org.commonmark.node.Link; -import org.commonmark.node.Node; -import org.commonmark.node.Text; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.Collection; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public abstract class Inline { - - private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; - - protected static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - - protected static final Pattern WHITESPACE = Pattern.compile("\\s+"); - - protected static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); - - protected static final Pattern LINK_TITLE = Pattern.compile( - "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + - '|' + - "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + - '|' + - "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); - - protected static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); - - protected static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - - - protected InlineContext context; - protected Node block; - protected int index; - protected String input; - - protected void bind( - @NonNull InlineContext context, - @NonNull Node block, - @NonNull String input, - int index) { - this.context = context; - this.block = block; - this.input = input; - this.index = index; - } - - @NonNull - public abstract Collection characters(); - - public abstract boolean parse(); - - /** - * If RE matches at current index in the input, advance index and return the match; otherwise return null. - */ - protected String match(Pattern re) { - if (index >= input.length()) { - return null; - } - Matcher matcher = re.matcher(input); - matcher.region(index, input.length()); - boolean m = matcher.find(); - if (m) { - index = matcher.end(); - return matcher.group(); - } else { - return null; - } - } - - protected void appendNode(Node node) { - block.appendChild(node); - } - - protected Text appendText(CharSequence text, int beginIndex, int endIndex) { - return appendText(text.subSequence(beginIndex, endIndex)); - } - - protected Text appendText(CharSequence text) { - Text node = new Text(text.toString()); - appendNode(node); - return node; - } - - /** - * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. - */ - protected char peek() { - if (index < input.length()) { - return input.charAt(index); - } else { - return '\0'; - } - } - - protected void addBracket(Bracket bracket) { - final Bracket lastBracket = context.lastBracket(); - if (lastBracket != null) { - lastBracket.bracketAfter = true; - } - context.lastBracket(bracket); - } - - protected void removeLastBracket() { - final InlineContext context = this.context; - context.lastBracket(context.lastBracket().previous); - } - - protected Bracket lastBracket() { - return context.lastBracket(); - } - - protected Delimiter lastDelimiter() { - return context.lastDelimiter(); - } - - protected Map referenceMap() { - return context.referenceMap(); - } - - protected Map delimiterProcessors() { - return context.delimiterProcessors(); - } - - /** - * Parse zero or more space characters, including at most one newline. - */ - protected boolean spnl() { - match(SPNL); - return true; - } - - /** - * Attempt to parse link destination, returning the string or null if no match. - */ - protected String parseLinkDestination() { - String res = match(LINK_DESTINATION_BRACES); - if (res != null) { // chop off surrounding <..>: - if (res.length() == 2) { - return ""; - } else { - return Escaping.unescapeString(res.substring(1, res.length() - 1)); - } - } else { - int startIndex = index; - parseLinkDestinationWithBalancedParens(); - return Escaping.unescapeString(input.substring(startIndex, index)); - } - } - - protected void parseLinkDestinationWithBalancedParens() { - int parens = 0; - while (true) { - char c = peek(); - switch (c) { - case '\0': - return; - case '\\': - // check if we have an escapable character - if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { - // skip over the escaped character (after switch) - index++; - break; - } - // otherwise, we treat this as a literal backslash - break; - case '(': - parens++; - break; - case ')': - if (parens == 0) { - return; - } else { - parens--; - } - break; - case ' ': - // ASCII space - return; - default: - // or control character - if (Character.isISOControl(c)) { - return; - } - } - index++; - } - } - - /** - * Attempt to parse link title (sans quotes), returning the string or null if no match. - */ - protected String parseLinkTitle() { - String title = match(LINK_TITLE); - if (title != null) { - // chop off quotes from title and unescape: - return Escaping.unescapeString(title.substring(1, title.length() - 1)); - } else { - return null; - } - } - - /** - * Attempt to parse a link label, returning number of characters parsed. - */ - protected int parseLinkLabel() { - String m = match(LINK_LABEL); - // Spec says "A link label can have at most 999 characters inside the square brackets" - if (m == null || m.length() > 1001) { - return 0; - } else { - return m.length(); - } - } - - protected void processDelimiters(Delimiter stackBottom) { - - Map openersBottom = new HashMap<>(); - - // find first closer above stackBottom: - Delimiter closer = lastDelimiter(); - while (closer != null && closer.previous != stackBottom) { - closer = closer.previous; - } - // move forward, looking for closers, and handling each - while (closer != null) { - char delimiterChar = closer.delimiterChar; - - DelimiterProcessor delimiterProcessor = delimiterProcessors().get(delimiterChar); - if (!closer.canClose || delimiterProcessor == null) { - closer = closer.next; - continue; - } - - char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); - - // Found delimiter closer. Now look back for first matching opener. - int useDelims = 0; - boolean openerFound = false; - boolean potentialOpenerFound = false; - Delimiter opener = closer.previous; - while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { - if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { - potentialOpenerFound = true; - useDelims = delimiterProcessor.getDelimiterUse(opener, closer); - if (useDelims > 0) { - openerFound = true; - break; - } - } - opener = opener.previous; - } - - if (!openerFound) { - if (!potentialOpenerFound) { - // Set lower bound for future searches for openers. - // Only do this when we didn't even have a potential - // opener (one that matches the character and can open). - // If an opener was rejected because of the number of - // delimiters (e.g. because of the "multiple of 3" rule), - // we want to consider it next time because the number - // of delimiters can change as we continue processing. - openersBottom.put(delimiterChar, closer.previous); - if (!closer.canOpen) { - // We can remove a closer that can't be an opener, - // once we've seen there's no matching opener: - removeDelimiterKeepNode(closer); - } - } - closer = closer.next; - continue; - } - - Text openerNode = opener.node; - Text closerNode = closer.node; - - // Remove number of used delimiters from stack and inline nodes. - opener.length -= useDelims; - closer.length -= useDelims; - openerNode.setLiteral( - openerNode.getLiteral().substring(0, - openerNode.getLiteral().length() - useDelims)); - closerNode.setLiteral( - closerNode.getLiteral().substring(0, - closerNode.getLiteral().length() - useDelims)); - - removeDelimitersBetween(opener, closer); - // The delimiter processor can re-parent the nodes between opener and closer, - // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. - mergeTextNodesBetweenExclusive(openerNode, closerNode); - delimiterProcessor.process(openerNode, closerNode, useDelims); - - // No delimiter characters left to process, so we can remove delimiter and the now empty node. - if (opener.length == 0) { - removeDelimiterAndNode(opener); - } - - if (closer.length == 0) { - Delimiter next = closer.next; - removeDelimiterAndNode(closer); - closer = next; - } - } - - // remove all delimiters - Delimiter lastDelimiter; - while ((lastDelimiter = lastDelimiter()) != null) { - if (lastDelimiter != stackBottom) { - removeDelimiterKeepNode(lastDelimiter); - } else { - break; - } - } -// while (lastDelimiter != null && lastDelimiter != stackBottom) { -// removeDelimiterKeepNode(lastDelimiter); -// } - } - - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - protected void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - protected void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - protected void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - - protected void removeDelimitersBetween(Delimiter opener, Delimiter closer) { - Delimiter delimiter = closer.previous; - while (delimiter != null && delimiter != opener) { - Delimiter previousDelimiter = delimiter.previous; - removeDelimiterKeepNode(delimiter); - delimiter = previousDelimiter; - } - } - - /** - * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. - */ - protected void removeDelimiterAndNode(Delimiter delim) { - Text node = delim.node; - node.unlink(); - removeDelimiter(delim); - } - - /** - * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. - */ - protected void removeDelimiterKeepNode(Delimiter delim) { - removeDelimiter(delim); - } - - protected void removeDelimiter(Delimiter delim) { - if (delim.previous != null) { - delim.previous.next = delim.next; - } - if (delim.next == null) { - // top of stack -// lastDelimiter = delim.previous; - context.lastDelimiter(delim.previous); - } else { - delim.next.previous = delim.previous; - } - } -} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java deleted file mode 100644 index 0c3b88b7..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineContext.java +++ /dev/null @@ -1,62 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; -import org.commonmark.node.Link; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.Map; - -public class InlineContext { - - /** - * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different - * from the algorithm described in the spec.) - */ - private Delimiter lastDelimiter; - - /** - * Top opening bracket ([ or ![)). - */ - private Bracket lastBracket; - - /** - * Link references by ID, needs to be built up using parseReference before calling parse. - */ - private Map referenceMap; - - private Map delimiterProcessors; - - - public Delimiter lastDelimiter() { - return lastDelimiter; - } - - public void lastDelimiter(Delimiter lastDelimiter) { - this.lastDelimiter = lastDelimiter; - } - - public Bracket lastBracket() { - return lastBracket; - } - - public void lastBracket(Bracket lastBracket) { - this.lastBracket = lastBracket; - } - - public Map referenceMap() { - return referenceMap; - } - - public void referenceMap(Map referenceMap) { - this.referenceMap = referenceMap; - } - - public Map delimiterProcessors() { - return delimiterProcessors; - } - - public void delimiterProcessors(Map delimiterProcessors) { - this.delimiterProcessors = delimiterProcessors; - } -} diff --git a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java b/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java deleted file mode 100644 index 12704f7e..00000000 --- a/sample/src/main/java/io/noties/markwon/sample/editor/inline/InlineParserImpl.java +++ /dev/null @@ -1,1190 +0,0 @@ -package io.noties.markwon.sample.editor.inline; - -import androidx.annotation.NonNull; - -import org.commonmark.internal.Delimiter; -import org.commonmark.internal.ReferenceParser; -import org.commonmark.internal.util.Escaping; -import org.commonmark.node.Link; -import org.commonmark.node.Node; -import org.commonmark.node.Text; -import org.commonmark.parser.InlineParser; -import org.commonmark.parser.InlineParserFactory; -import org.commonmark.parser.delimiter.DelimiterProcessor; - -import java.util.ArrayList; -import java.util.BitSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import io.noties.debug.Debug; - -public class InlineParserImpl implements InlineParser, ReferenceParser { - - public interface Builder { - - @NonNull - Builder addInlineProcessor(@NonNull Inline inline); - - @NonNull - Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor); - - @NonNull - InlineParserFactory build(); - } - - @NonNull - public static Builder builder() { - return new BuilderImpl(); - } - -// @NonNull -// public static InlineParserFactory factory() { -//// return context -> new InlineParserImpl(context.getCustomDelimiterProcessors()); -// } -// -// public static InlineParserFactory factory(Inline... inlines) { -// return context -> new InlineParserImpl(Arrays.asList(inlines), context.getCustomDelimiterProcessors()); -// } - - private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE; -// private static final String HTMLCOMMENT = "|"; -// private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; -// private static final String DECLARATION = "]*>"; -// private static final String CDATA = ""; -// private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT -// + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; -// private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; - - private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~"; - private static final Pattern PUNCTUATION = Pattern - .compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]"); - -// private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE); - - private static final Pattern LINK_TITLE = Pattern.compile( - "^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" + - '|' + - "'(" + ESCAPED_CHAR + "|[^'\\x00])*'" + - '|' + - "\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))"); - - private static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])"); - - private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]"); - - private static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE); - -// private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE); -// -// private static final Pattern TICKS = Pattern.compile("`+"); -// -// private static final Pattern TICKS_HERE = Pattern.compile("^`+"); -// -// private static final Pattern EMAIL_AUTOLINK = Pattern -// .compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>"); -// -// private static final Pattern AUTOLINK = Pattern -// .compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>"); - - private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?"); - - private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]"); - -// private static final Pattern WHITESPACE = Pattern.compile("\\s+"); -// -// private static final Pattern FINAL_SPACE = Pattern.compile(" *$"); - - private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)"); - - private final BitSet specialCharacters; - private final BitSet delimiterCharacters; - private final Map delimiterProcessors; - - /** - * Link references by ID, needs to be built up using parseReference before calling parse. - */ -// private Map referenceMap = new HashMap<>(); - - private Node block; - - private String input; - private int index; - - /** - * Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different - * from the algorithm described in the spec.) - */ -// private Delimiter lastDelimiter; - - /** - * Top opening bracket ([ or ![)). - */ -// private Bracket lastBracket; - - private final Map> inlines; - - private InlineContext inlineContext; - - - public InlineParserImpl(List inlines, List delimiterProcessors) { - this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors); - this.delimiterCharacters = calculateDelimiterCharacters(this.delimiterProcessors.keySet()); - this.specialCharacters = calculateSpecialCharacters(delimiterCharacters); - // we must also put into special characters (otherwise won't be triggered) - this.inlines = calculateInlines(specialCharacters, inlines); - } - - @NonNull - private static Map> calculateInlines(@NonNull BitSet specialCharacters, @NonNull List inlines) { - final Map> map = new HashMap<>(inlines.size()); - List list; - for (Inline inline : inlines) { - for (Character character : inline.characters()) { - specialCharacters.set(character); - list = map.get(character); - if (list == null) { - list = new ArrayList<>(1); - map.put(character, list); - } - list.add(inline); - } - } - return map; - } - - public static BitSet calculateDelimiterCharacters(Set characters) { - BitSet bitSet = new BitSet(); - for (Character character : characters) { - bitSet.set(character); - } - return bitSet; - } - - public static BitSet calculateSpecialCharacters(BitSet delimiterCharacters) { - BitSet bitSet = new BitSet(); - bitSet.or(delimiterCharacters); - bitSet.set('\n'); - bitSet.set('`'); - bitSet.set('['); - bitSet.set(']'); - bitSet.set('\\'); - bitSet.set('!'); - bitSet.set('<'); - bitSet.set('&'); - return bitSet; - } - - public static Map calculateDelimiterProcessors(List delimiterProcessors) { - Map map = new HashMap<>(); -// addDelimiterProcessors(Arrays.asList(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map); - addDelimiterProcessors(delimiterProcessors, map); - return map; - } - - private static void addDelimiterProcessors(Iterable delimiterProcessors, Map map) { - for (DelimiterProcessor delimiterProcessor : delimiterProcessors) { - char opening = delimiterProcessor.getOpeningCharacter(); - char closing = delimiterProcessor.getClosingCharacter(); - if (opening == closing) { - DelimiterProcessor old = map.get(opening); - if (old != null && old.getOpeningCharacter() == old.getClosingCharacter()) { - StaggeredDelimiterProcessor s; - if (old instanceof StaggeredDelimiterProcessor) { - s = (StaggeredDelimiterProcessor) old; - } else { - s = new StaggeredDelimiterProcessor(opening); - s.add(old); - } - s.add(delimiterProcessor); - map.put(opening, s); - } else { - addDelimiterProcessorForChar(opening, delimiterProcessor, map); - } - } else { - addDelimiterProcessorForChar(opening, delimiterProcessor, map); - addDelimiterProcessorForChar(closing, delimiterProcessor, map); - } - } - } - - private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map delimiterProcessors) { - DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd); - if (existing != null) { - throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'"); - } - } - - /** - * Parse content in block into inline children, using reference map to resolve references. - */ - @Override - public void parse(String content, Node block) { - this.block = block; - this.input = content.trim(); - this.index = 0; -// this.lastDelimiter = null; -// this.lastBracket = null; - this.inlineContext = createInlineContext(); - - Debug.i(input); -// Debug.i(inlines); - - boolean moreToParse; - do { - moreToParse = parseInline(); - } while (moreToParse); - - // todo: must be somehow shared - processDelimiters(null); - mergeChildTextNodes(block); - } - - private InlineContext createInlineContext() { - final InlineContext context = new InlineContext(); - context.delimiterProcessors(delimiterProcessors); - context.referenceMap(new HashMap<>()); - return context; - } - - /** - * Attempt to parse a link reference, modifying the internal reference map. - */ - @Override - public int parseReference(String s) { - this.input = s; - this.index = 0; - String dest; - String title; - int matchChars; - int startIndex = index; - - // label: - matchChars = parseLinkLabel(); - if (matchChars == 0) { - return 0; - } - - String rawLabel = input.substring(0, matchChars); - - // colon: - if (peek() != ':') { - return 0; - } - index++; - - // link url - spnl(); - - dest = parseLinkDestination(); - if (dest == null || dest.length() == 0) { - return 0; - } - - int beforeTitle = index; - spnl(); - title = parseLinkTitle(); - if (title == null) { - // rewind before spaces - index = beforeTitle; - } - - boolean atLineEnd = true; - if (index != input.length() && match(LINE_END) == null) { - if (title == null) { - atLineEnd = false; - } else { - // the potential title we found is not at the line end, - // but it could still be a legal link reference if we - // discard the title - title = null; - // rewind before spaces - index = beforeTitle; - // and instead check if the link URL is at the line end - atLineEnd = match(LINE_END) != null; - } - } - - if (!atLineEnd) { - return 0; - } - - String normalizedLabel = Escaping.normalizeReference(rawLabel); - if (normalizedLabel.isEmpty()) { - return 0; - } - - final Map referenceMap = inlineContext.referenceMap(); - - if (!referenceMap.containsKey(normalizedLabel)) { - Link link = new Link(dest, title); - referenceMap.put(normalizedLabel, link); - } - return index - startIndex; - } - - private Text appendText(CharSequence text, int beginIndex, int endIndex) { - return appendText(text.subSequence(beginIndex, endIndex)); - } - - private Text appendText(CharSequence text) { - Text node = new Text(text.toString()); - appendNode(node); - return node; - } - - private void appendNode(Node node) { - block.appendChild(node); - } - - /** - * Parse the next inline element in subject, advancing input index. - * On success, add the result to block's children and return true. - * On failure, return false. - */ - private boolean parseInline() { - char c = peek(); - if (c == '\0') { - return false; - } - - boolean res = false; - - final List inlines = this.inlines.get(c); - - Debug.i("char: '%s', inlines: %s", c, inlines); - - if (inlines != null) { - for (Inline inline : inlines) { - res = processInline(inline); - Debug.i("char: '%s', res: %s, inline: %s", c, res, inline); - if (res) { - break; - } - } - } else { - boolean isDelimiter = delimiterCharacters.get(c); - if (isDelimiter) { - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); - res = parseDelimiters(delimiterProcessor, c); - } else { - res = parseString(); - } - } - -// switch (c) { -// case '\n': -// res = parseNewline(); -// break; -// case '\\': -// res = parseBackslash(); -// break; -// case '`': -// res = parseBackticks(); -// break; -// case '[': -// res = parseOpenBracket(); -// break; -// case '!': -// res = parseBang(); -// break; -// case ']': -// res = parseCloseBracket(); -// break; -// case '<': -// res = parseAutolink() || parseHtmlInline(); -// break; -// case '&': -// res = parseEntity(); -// break; -// default: -// boolean isDelimiter = delimiterCharacters.get(c); -// if (isDelimiter) { -// DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c); -// res = parseDelimiters(delimiterProcessor, c); -// } else { -// res = parseString(); -// } -// break; -// } - if (!res) { - index++; - // When we get here, it's only for a single special character that turned out to not have a special meaning. - // So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String. - String literal = String.valueOf(c); - appendText(literal); - } - - return true; - } - - private boolean processInline(@NonNull Inline inline) { - inline.bind(inlineContext, block, input, index); - final boolean result = inline.parse(); - index = inline.index; - return result; - } - - /** - * If RE matches at current index in the input, advance index and return the match; otherwise return null. - */ - private String match(Pattern re) { - if (index >= input.length()) { - return null; - } - Matcher matcher = re.matcher(input); - matcher.region(index, input.length()); - boolean m = matcher.find(); - if (m) { - index = matcher.end(); - return matcher.group(); - } else { - return null; - } - } - - /** - * Returns the char at the current input index, or {@code '\0'} in case there are no more characters. - */ - private char peek() { - if (index < input.length()) { - return input.charAt(index); - } else { - return '\0'; - } - } - - /** - * Parse zero or more space characters, including at most one newline. - */ - private boolean spnl() { - match(SPNL); - return true; - } - -// /** -// * Parse a newline. If it was preceded by two spaces, return a hard line break; otherwise a soft line break. -// */ -// private boolean parseNewline() { -// index++; // assume we're at a \n -// -// Node lastChild = block.getLastChild(); -// // Check previous text for trailing spaces. -// // The "endsWith" is an optimization to avoid an RE match in the common case. -// if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) { -// Text text = (Text) lastChild; -// String literal = text.getLiteral(); -// Matcher matcher = FINAL_SPACE.matcher(literal); -// int spaces = matcher.find() ? matcher.end() - matcher.start() : 0; -// if (spaces > 0) { -// text.setLiteral(literal.substring(0, literal.length() - spaces)); -// } -// appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak()); -// } else { -// appendNode(new SoftLineBreak()); -// } -// -// // gobble leading spaces in next line -// while (peek() == ' ') { -// index++; -// } -// return true; -// } - -// /** -// * Parse a backslash-escaped special character, adding either the escaped character, a hard line break -// * (if the backslash is followed by a newline), or a literal backslash to the block's children. -// */ -// private boolean parseBackslash() { -// index++; -// if (peek() == '\n') { -// appendNode(new HardLineBreak()); -// index++; -// } else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) { -// appendText(input, index, index + 1); -// index++; -// } else { -// appendText("\\"); -// } -// return true; -// } - -// /** -// * Attempt to parse backticks, adding either a backtick code span or a literal sequence of backticks. -// */ -// private boolean parseBackticks() { -// String ticks = match(TICKS_HERE); -// if (ticks == null) { -// return false; -// } -// int afterOpenTicks = index; -// String matched; -// while ((matched = match(TICKS)) != null) { -// if (matched.equals(ticks)) { -// Code node = new Code(); -// String content = input.substring(afterOpenTicks, index - ticks.length()); -// String literal = WHITESPACE.matcher(content.trim()).replaceAll(" "); -// node.setLiteral(literal); -// appendNode(node); -// return true; -// } -// } -// // If we got here, we didn't match a closing backtick sequence. -// index = afterOpenTicks; -// appendText(ticks); -// return true; -// } - - /** - * Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters. - */ - private boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { - DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar); - if (res == null) { - return false; - } - int length = res.count; - int startIndex = index; - - index += length; - Text node = appendText(input, startIndex, index); - - // Add entry to stack for this opener - - final Delimiter lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, inlineContext.lastDelimiter()); - lastDelimiter.length = length; - lastDelimiter.originalLength = length; - if (lastDelimiter.previous != null) { - lastDelimiter.previous.next = lastDelimiter; - } - inlineContext.lastDelimiter(lastDelimiter); - - return true; - } - -// /** -// * Add open bracket to delimiter stack and add a text node to block's children. -// */ -// private boolean parseOpenBracket() { -// int startIndex = index; -// index++; -// -// Text node = appendText("["); -// -// // Add entry to stack for this opener -// addBracket(Bracket.link(node, startIndex, lastBracket, lastDelimiter)); -// -// return true; -// } - -// /** -// * If next character is [, and ! delimiter to delimiter stack and add a text node to block's children. -// * Otherwise just add a text node. -// */ -// private boolean parseBang() { -// int startIndex = index; -// index++; -// if (peek() == '[') { -// index++; -// -// Text node = appendText("!["); -// -// // Add entry to stack for this opener -// addBracket(Bracket.image(node, startIndex + 1, lastBracket, lastDelimiter)); -// } else { -// appendText("!"); -// } -// return true; -// } - -// /** -// * Try to match close bracket against an opening in the delimiter stack. Add either a link or image, or a -// * plain [ character, to block's children. If there is a matching delimiter, remove it from the delimiter stack. -// */ -// private boolean parseCloseBracket() { -// index++; -// int startIndex = index; -// -// // Get previous `[` or `![` -// Bracket opener = lastBracket; -// if (opener == null) { -// // No matching opener, just return a literal. -// appendText("]"); -// return true; -// } -// -// if (!opener.allowed) { -// // Matching opener but it's not allowed, just return a literal. -// appendText("]"); -// removeLastBracket(); -// return true; -// } -// -// // Check to see if we have a link/image -// -// String dest = null; -// String title = null; -// boolean isLinkOrImage = false; -// -// // Maybe a inline link like `[foo](/uri "title")` -// if (peek() == '(') { -// index++; -// spnl(); -// if ((dest = parseLinkDestination()) != null) { -// spnl(); -// // title needs a whitespace before -// if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) { -// title = parseLinkTitle(); -// spnl(); -// } -// if (peek() == ')') { -// index++; -// isLinkOrImage = true; -// } else { -// index = startIndex; -// } -// } -// } -// -// // Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]` -// if (!isLinkOrImage) { -// -// // See if there's a link label like `[bar]` or `[]` -// int beforeLabel = index; -// int labelLength = parseLinkLabel(); -// String ref = null; -// if (labelLength > 2) { -// ref = input.substring(beforeLabel, beforeLabel + labelLength); -// } else if (!opener.bracketAfter) { -// // If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference. -// // But it can only be a reference when there's no (unescaped) bracket in it. -// // If there is, we don't even need to try to look up the reference. This is an optimization. -// ref = input.substring(opener.index, startIndex); -// } -// -// if (ref != null) { -// Link link = referenceMap.get(Escaping.normalizeReference(ref)); -// if (link != null) { -// dest = link.getDestination(); -// title = link.getTitle(); -// isLinkOrImage = true; -// } -// } -// } -// -// if (isLinkOrImage) { -// // If we got here, open is a potential opener -// Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title); -// -// Node node = opener.node.getNext(); -// while (node != null) { -// Node next = node.getNext(); -// linkOrImage.appendChild(node); -// node = next; -// } -// appendNode(linkOrImage); -// -// // Process delimiters such as emphasis inside link/image -// processDelimiters(opener.previousDelimiter); -// mergeChildTextNodes(linkOrImage); -// // We don't need the corresponding text node anymore, we turned it into a link/image node -// opener.node.unlink(); -// removeLastBracket(); -// -// // Links within links are not allowed. We found this link, so there can be no other link around it. -// if (!opener.image) { -// Bracket bracket = lastBracket; -// while (bracket != null) { -// if (!bracket.image) { -// // Disallow link opener. It will still get matched, but will not result in a link. -// bracket.allowed = false; -// } -// bracket = bracket.previous; -// } -// } -// -// return true; -// -// } else { // no link or image -// -// appendText("]"); -// removeLastBracket(); -// -// index = startIndex; -// return true; -// } -// } - -// private void addBracket(Bracket bracket) { -// if (lastBracket != null) { -// lastBracket.bracketAfter = true; -// } -// lastBracket = bracket; -// } -// -// private void removeLastBracket() { -// lastBracket = lastBracket.previous; -// } - - /** - * Attempt to parse link destination, returning the string or null if no match. - */ - private String parseLinkDestination() { - String res = match(LINK_DESTINATION_BRACES); - if (res != null) { // chop off surrounding <..>: - if (res.length() == 2) { - return ""; - } else { - return Escaping.unescapeString(res.substring(1, res.length() - 1)); - } - } else { - int startIndex = index; - parseLinkDestinationWithBalancedParens(); - return Escaping.unescapeString(input.substring(startIndex, index)); - } - } - - private void parseLinkDestinationWithBalancedParens() { - int parens = 0; - while (true) { - char c = peek(); - switch (c) { - case '\0': - return; - case '\\': - // check if we have an escapable character - if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) { - // skip over the escaped character (after switch) - index++; - break; - } - // otherwise, we treat this as a literal backslash - break; - case '(': - parens++; - break; - case ')': - if (parens == 0) { - return; - } else { - parens--; - } - break; - case ' ': - // ASCII space - return; - default: - // or control character - if (Character.isISOControl(c)) { - return; - } - } - index++; - } - } - - /** - * Attempt to parse link title (sans quotes), returning the string or null if no match. - */ - private String parseLinkTitle() { - String title = match(LINK_TITLE); - if (title != null) { - // chop off quotes from title and unescape: - return Escaping.unescapeString(title.substring(1, title.length() - 1)); - } else { - return null; - } - } - - /** - * Attempt to parse a link label, returning number of characters parsed. - */ - private int parseLinkLabel() { - String m = match(LINK_LABEL); - // Spec says "A link label can have at most 999 characters inside the square brackets" - if (m == null || m.length() > 1001) { - return 0; - } else { - return m.length(); - } - } - -// /** -// * Attempt to parse an autolink (URL or email in pointy brackets). -// */ -// private boolean parseAutolink() { -// String m; -// if ((m = match(EMAIL_AUTOLINK)) != null) { -// String dest = m.substring(1, m.length() - 1); -// Link node = new Link("mailto:" + dest, null); -// node.appendChild(new Text(dest)); -// appendNode(node); -// return true; -// } else if ((m = match(AUTOLINK)) != null) { -// String dest = m.substring(1, m.length() - 1); -// Link node = new Link(dest, null); -// node.appendChild(new Text(dest)); -// appendNode(node); -// return true; -// } else { -// return false; -// } -// } - -// /** -// * Attempt to parse inline HTML. -// */ -// private boolean parseHtmlInline() { -// String m = match(HTML_TAG); -// if (m != null) { -// HtmlInline node = new HtmlInline(); -// node.setLiteral(m); -// appendNode(node); -// return true; -// } else { -// return false; -// } -// } - -// /** -// * Attempt to parse an entity, return Entity object if successful. -// */ -// private boolean parseEntity() { -// String m; -// if ((m = match(ENTITY_HERE)) != null) { -// appendText(Html5Entities.entityToString(m)); -// return true; -// } else { -// return false; -// } -// } - - /** - * Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string. - */ - private boolean parseString() { - int begin = index; - int length = input.length(); - while (index != length) { - if (specialCharacters.get(input.charAt(index))) { - break; - } - index++; - } - if (begin != index) { - appendText(input, begin, index); - return true; - } else { - return false; - } - } - - /** - * Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters - * and whether they are positioned such that they can open and/or close emphasis or strong emphasis. - * - * @return information about delimiter run, or {@code null} - */ - private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) { - int startIndex = index; - - int delimiterCount = 0; - while (peek() == delimiterChar) { - delimiterCount++; - index++; - } - - if (delimiterCount < delimiterProcessor.getMinLength()) { - index = startIndex; - return null; - } - - String before = startIndex == 0 ? "\n" : - input.substring(startIndex - 1, startIndex); - - char charAfter = peek(); - String after = charAfter == '\0' ? "\n" : - String.valueOf(charAfter); - - // We could be more lazy here, in most cases we don't need to do every match case. - boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches(); - boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches(); - boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches(); - boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches(); - - boolean leftFlanking = !afterIsWhitespace && - (!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation); - boolean rightFlanking = !beforeIsWhitespace && - (!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation); - boolean canOpen; - boolean canClose; - if (delimiterChar == '_') { - canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation); - canClose = rightFlanking && (!leftFlanking || afterIsPunctuation); - } else { - canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter(); - canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter(); - } - - index = startIndex; - return new DelimiterData(delimiterCount, canOpen, canClose); - } - - private void processDelimiters(Delimiter stackBottom) { - - Map openersBottom = new HashMap<>(); - - // find first closer above stackBottom: - Delimiter closer = inlineContext.lastDelimiter(); - while (closer != null && closer.previous != stackBottom) { - closer = closer.previous; - } - // move forward, looking for closers, and handling each - while (closer != null) { - char delimiterChar = closer.delimiterChar; - - DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar); - if (!closer.canClose || delimiterProcessor == null) { - closer = closer.next; - continue; - } - - char openingDelimiterChar = delimiterProcessor.getOpeningCharacter(); - - // Found delimiter closer. Now look back for first matching opener. - int useDelims = 0; - boolean openerFound = false; - boolean potentialOpenerFound = false; - Delimiter opener = closer.previous; - while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) { - if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) { - potentialOpenerFound = true; - useDelims = delimiterProcessor.getDelimiterUse(opener, closer); - if (useDelims > 0) { - openerFound = true; - break; - } - } - opener = opener.previous; - } - - if (!openerFound) { - if (!potentialOpenerFound) { - // Set lower bound for future searches for openers. - // Only do this when we didn't even have a potential - // opener (one that matches the character and can open). - // If an opener was rejected because of the number of - // delimiters (e.g. because of the "multiple of 3" rule), - // we want to consider it next time because the number - // of delimiters can change as we continue processing. - openersBottom.put(delimiterChar, closer.previous); - if (!closer.canOpen) { - // We can remove a closer that can't be an opener, - // once we've seen there's no matching opener: - removeDelimiterKeepNode(closer); - } - } - closer = closer.next; - continue; - } - - Text openerNode = opener.node; - Text closerNode = closer.node; - - // Remove number of used delimiters from stack and inline nodes. - opener.length -= useDelims; - closer.length -= useDelims; - openerNode.setLiteral( - openerNode.getLiteral().substring(0, - openerNode.getLiteral().length() - useDelims)); - closerNode.setLiteral( - closerNode.getLiteral().substring(0, - closerNode.getLiteral().length() - useDelims)); - - removeDelimitersBetween(opener, closer); - // The delimiter processor can re-parent the nodes between opener and closer, - // so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves. - mergeTextNodesBetweenExclusive(openerNode, closerNode); - delimiterProcessor.process(openerNode, closerNode, useDelims); - - // No delimiter characters left to process, so we can remove delimiter and the now empty node. - if (opener.length == 0) { - removeDelimiterAndNode(opener); - } - - if (closer.length == 0) { - Delimiter next = closer.next; - removeDelimiterAndNode(closer); - closer = next; - } - } - - // remove all delimiters - Delimiter lastDelimiter; - while (((lastDelimiter = inlineContext.lastDelimiter())) != null) { - if (lastDelimiter != stackBottom) { - removeDelimiterKeepNode(lastDelimiter); - } else { - break; - } - } -// while (lastDelimiter != null && lastDelimiter != stackBottom) { -// removeDelimiterKeepNode(lastDelimiter); -// } - } - - private void removeDelimitersBetween(Delimiter opener, Delimiter closer) { - Delimiter delimiter = closer.previous; - while (delimiter != null && delimiter != opener) { - Delimiter previousDelimiter = delimiter.previous; - removeDelimiterKeepNode(delimiter); - delimiter = previousDelimiter; - } - } - - /** - * Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`. - */ - private void removeDelimiterAndNode(Delimiter delim) { - Text node = delim.node; - node.unlink(); - removeDelimiter(delim); - } - - /** - * Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`. - */ - private void removeDelimiterKeepNode(Delimiter delim) { - removeDelimiter(delim); - } - - private void removeDelimiter(Delimiter delim) { - if (delim.previous != null) { - delim.previous.next = delim.next; - } - if (delim.next == null) { - // top of stack -// lastDelimiter = delim.previous; - inlineContext.lastDelimiter(delim.previous); - } else { - delim.next.previous = delim.previous; - } - } - - private void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) { - // No nodes between them - if (fromNode == toNode || fromNode.getNext() == toNode) { - return; - } - - mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious()); - } - - private void mergeChildTextNodes(Node node) { - // No children or just one child node, no need for merging - if (node.getFirstChild() == node.getLastChild()) { - return; - } - - mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild()); - } - - private void mergeTextNodesInclusive(Node fromNode, Node toNode) { - Text first = null; - Text last = null; - int length = 0; - - Node node = fromNode; - while (node != null) { - if (node instanceof Text) { - Text text = (Text) node; - if (first == null) { - first = text; - } - length += text.getLiteral().length(); - last = text; - } else { - mergeIfNeeded(first, last, length); - first = null; - last = null; - length = 0; - } - if (node == toNode) { - break; - } - node = node.getNext(); - } - - mergeIfNeeded(first, last, length); - } - - private void mergeIfNeeded(Text first, Text last, int textLength) { - if (first != null && last != null && first != last) { - StringBuilder sb = new StringBuilder(textLength); - sb.append(first.getLiteral()); - Node node = first.getNext(); - Node stop = last.getNext(); - while (node != stop) { - sb.append(((Text) node).getLiteral()); - Node unlink = node; - node = node.getNext(); - unlink.unlink(); - } - String literal = sb.toString(); - first.setLiteral(literal); - } - } - - private static class DelimiterData { - - final int count; - final boolean canClose; - final boolean canOpen; - - DelimiterData(int count, boolean canOpen, boolean canClose) { - this.count = count; - this.canOpen = canOpen; - this.canClose = canClose; - } - } - - private static class BuilderImpl implements Builder { - - private final List inlines = new ArrayList<>(); - private final List delimiterProcessors = new ArrayList<>(); - - @NonNull - @Override - public Builder addInlineProcessor(@NonNull Inline inline) { - inlines.add(inline); - return this; - } - - @NonNull - @Override - public Builder addDelimiterProcessor(@NonNull DelimiterProcessor delimiterProcessor) { - delimiterProcessors.add(delimiterProcessor); - return this; - } - - @NonNull - @Override - public InlineParserFactory build() { - return inlineParserContext -> { - final List processors; - final List custom = inlineParserContext.getCustomDelimiterProcessors(); - if (custom != null && !custom.isEmpty()) { - processors = new ArrayList<>(delimiterProcessors); - processors.addAll(custom); - } else { - processors = delimiterProcessors; - } - return new InlineParserImpl(inlines, processors); - }; - } - } -} diff --git a/settings.gradle b/settings.gradle index f684fbb7..7ca10d2a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,6 +10,7 @@ include ':app', ':sample', ':markwon-image', ':markwon-image-glide', ':markwon-image-picasso', + ':markwon-inline-parser', ':markwon-linkify', ':markwon-recycler', ':markwon-recycler-table', From f2f50266945e9e3b32f0c246b84a4051768f4cde Mon Sep 17 00:00:00 2001 From: Dimitry Ivanov Date: Wed, 13 Nov 2019 14:38:49 +0300 Subject: [PATCH 3/4] Add sample and documentation for inline-parser --- CHANGELOG.md | 1 + docs/.vuepress/config.js | 1 + docs/docs/v4/inline-parser/README.md | 78 ++++++++++++ markwon-inline-parser/README.md | 8 +- .../AsteriskDelimiterProcessor.java | 7 -- .../inlineparser/MarkwonInlineParser.java | 2 + .../UnderscoreDelimiterProcessor.java | 7 -- sample/src/main/AndroidManifest.xml | 2 + .../noties/markwon/sample/MainActivity.java | 5 + .../java/io/noties/markwon/sample/Sample.java | 4 +- .../inlineparser/InlineParserActivity.java | 119 ++++++++++++++++++ .../src/main/res/values/strings-samples.xml | 2 + 12 files changed, 215 insertions(+), 21 deletions(-) create mode 100644 docs/docs/v4/inline-parser/README.md delete mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java delete mode 100644 markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java create mode 100644 sample/src/main/java/io/noties/markwon/sample/inlineparser/InlineParserActivity.java diff --git a/CHANGELOG.md b/CHANGELOG.md index ee18c59a..655ae84d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ # 4.2.0-SNAPSHOT * `MarkwonEditor` to highlight markdown input whilst editing (new module: `markwon-editor`) +* `MarkwonInlineParser` to customize inline parsing (new module: `markwon-inline-parser`) * `Markwon#configuration` method to expose `MarkwonConfiguration` via public API * `HeadingSpan#getLevel` getter * Add `SvgPictureMediaDecoder` in `image` module to deal with SVG without dimensions ([#165]) diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js index f8db7ba5..bd44aa90 100644 --- a/docs/.vuepress/config.js +++ b/docs/.vuepress/config.js @@ -104,6 +104,7 @@ module.exports = { '/docs/v4/image/', '/docs/v4/image-glide/', '/docs/v4/image-picasso/', + '/docs/v4/inline-parser/', '/docs/v4/linkify/', '/docs/v4/recycler/', '/docs/v4/recycler-table/', diff --git a/docs/docs/v4/inline-parser/README.md b/docs/docs/v4/inline-parser/README.md new file mode 100644 index 00000000..4c5471a1 --- /dev/null +++ b/docs/docs/v4/inline-parser/README.md @@ -0,0 +1,78 @@ +# Inline Parser + +**Experimental** commonmark-java inline parser that allows customizing +core features and/or extend with own. + +Usage of _internal_ classes: +```java +import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.util.Escaping; +import org.commonmark.internal.util.Html5Entities; +import org.commonmark.internal.util.Parsing; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; +``` + +--- + +```java +// all default (like current commonmark-java InlineParserImpl) +final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + .build(); +``` + +```java +// disable images (current markdown images will be considered as links): +final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + .excludeInlineProcessor(BangInlineProcessor.class) + .build(); +``` + +```java +// disable core delimiter processors for `*`|`_` and `**`|`__` +final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + .excludeDelimiterProcessor(AsteriskDelimiterProcessor.class) + .excludeDelimiterProcessor(UnderscoreDelimiterProcessor.class) + .build(); +``` + +```java +// disable _all_ markdown inlines except for links (open and close bracket handling `[` & `]`) +final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder() + // note that there is no `includeDefaults` method call + .referencesEnabled(true) + .addInlineProcessor(new OpenBracketInlineProcessor()) + .addInlineProcessor(new CloseBracketInlineProcessor()) + .build(); +``` + +To use custom InlineParser: +```java +final Markwon markwon = Markwon.builder(this) + .usePlugin(new AbstractMarkwonPlugin() { + @Override + public void configureParser(@NonNull Parser.Builder builder) { + builder.inlineParserFactory(inlineParserFactory); + } + }) + .build(); +``` + +--- + +The list of available inline processors: + +* `AutolinkInlineProcessor` (`<` => ``) +* `BackslashInlineProcessor` (`\\`) +* `BackticksInlineProcessor` (` => `code`) +* `BangInlineProcessor` (`!` => `![alt](#src)`) +* `CloseBracketInlineProcessor` (`]` => `[link](#href)`, `![alt](#src)`) +* `EntityInlineProcessor` (`&` => `&`) +* `HtmlInlineProcessor` (`<` => ``) +* `NewLineInlineProcessor` (`\n`) +* `OpenBracketInlineProcessor` (`[` => `[link](#href)`) \ No newline at end of file diff --git a/markwon-inline-parser/README.md b/markwon-inline-parser/README.md index 5b0e1335..bcfa3802 100644 --- a/markwon-inline-parser/README.md +++ b/markwon-inline-parser/README.md @@ -4,17 +4,13 @@ ```java import org.commonmark.internal.Bracket; +import org.commonmark.internal.Delimiter; +import org.commonmark.internal.ReferenceParser; import org.commonmark.internal.util.Escaping; import org.commonmark.internal.util.Html5Entities; import org.commonmark.internal.util.Parsing; -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; -import org.commonmark.internal.ReferenceParser; import org.commonmark.internal.inline.AsteriskDelimiterProcessor; import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; -import org.commonmark.internal.util.Escaping; -import org.commonmark.internal.Bracket; -import org.commonmark.internal.Delimiter; ``` `StaggeredDelimiterProcessor` class source is copied (required for InlineParser) \ No newline at end of file diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java deleted file mode 100644 index 3a8d570e..00000000 --- a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/AsteriskDelimiterProcessor.java +++ /dev/null @@ -1,7 +0,0 @@ -package io.noties.markwon.inlineparser; - -/** - * @since 4.2.0-SNAPSHOT - */ -public class AsteriskDelimiterProcessor extends org.commonmark.internal.inline.AsteriskDelimiterProcessor { -} diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java index 5bdda362..89bb18c5 100644 --- a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java +++ b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/MarkwonInlineParser.java @@ -6,6 +6,8 @@ import androidx.annotation.Nullable; import org.commonmark.internal.Bracket; import org.commonmark.internal.Delimiter; import org.commonmark.internal.ReferenceParser; +import org.commonmark.internal.inline.AsteriskDelimiterProcessor; +import org.commonmark.internal.inline.UnderscoreDelimiterProcessor; import org.commonmark.internal.util.Escaping; import org.commonmark.node.Link; import org.commonmark.node.Node; diff --git a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java b/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java deleted file mode 100644 index 83f7771f..00000000 --- a/markwon-inline-parser/src/main/java/io/noties/markwon/inlineparser/UnderscoreDelimiterProcessor.java +++ /dev/null @@ -1,7 +0,0 @@ -package io.noties.markwon.inlineparser; - -/** - * @since 4.2.0-SNAPSHOT - */ -public class UnderscoreDelimiterProcessor extends org.commonmark.internal.inline.UnderscoreDelimiterProcessor { -} diff --git a/sample/src/main/AndroidManifest.xml b/sample/src/main/AndroidManifest.xml index 6bc6ef02..ee887f8c 100644 --- a/sample/src/main/AndroidManifest.xml +++ b/sample/src/main/AndroidManifest.xml @@ -33,6 +33,8 @@ android:name=".editor.EditorActivity" android:windowSoftInputMode="adjustResize" /> + + \ No newline at end of file diff --git a/sample/src/main/java/io/noties/markwon/sample/MainActivity.java b/sample/src/main/java/io/noties/markwon/sample/MainActivity.java index 59c6049f..db937d19 100644 --- a/sample/src/main/java/io/noties/markwon/sample/MainActivity.java +++ b/sample/src/main/java/io/noties/markwon/sample/MainActivity.java @@ -24,6 +24,7 @@ import io.noties.markwon.sample.customextension.CustomExtensionActivity; import io.noties.markwon.sample.customextension2.CustomExtensionActivity2; import io.noties.markwon.sample.editor.EditorActivity; import io.noties.markwon.sample.html.HtmlActivity; +import io.noties.markwon.sample.inlineparser.InlineParserActivity; import io.noties.markwon.sample.latex.LatexActivity; import io.noties.markwon.sample.precomputed.PrecomputedActivity; import io.noties.markwon.sample.recycler.RecyclerActivity; @@ -122,6 +123,10 @@ public class MainActivity extends Activity { activity = EditorActivity.class; break; + case INLINE_PARSER: + activity = InlineParserActivity.class; + break; + default: throw new IllegalStateException("No Activity is associated with sample-item: " + item); } diff --git a/sample/src/main/java/io/noties/markwon/sample/Sample.java b/sample/src/main/java/io/noties/markwon/sample/Sample.java index c2a2add3..221ee0bc 100644 --- a/sample/src/main/java/io/noties/markwon/sample/Sample.java +++ b/sample/src/main/java/io/noties/markwon/sample/Sample.java @@ -23,7 +23,9 @@ public enum Sample { PRECOMPUTED_TEXT(R.string.sample_precomputed_text), - EDITOR(R.string.sample_editor); + EDITOR(R.string.sample_editor), + + INLINE_PARSER(R.string.sample_inline_parser); private final int textResId; diff --git a/sample/src/main/java/io/noties/markwon/sample/inlineparser/InlineParserActivity.java b/sample/src/main/java/io/noties/markwon/sample/inlineparser/InlineParserActivity.java new file mode 100644 index 00000000..955d6db2 --- /dev/null +++ b/sample/src/main/java/io/noties/markwon/sample/inlineparser/InlineParserActivity.java @@ -0,0 +1,119 @@ +package io.noties.markwon.sample.inlineparser; + +import android.app.Activity; +import android.os.Bundle; +import android.widget.TextView; + +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; + +import org.commonmark.node.Block; +import org.commonmark.node.BlockQuote; +import org.commonmark.node.Heading; +import org.commonmark.node.HtmlBlock; +import org.commonmark.node.ListBlock; +import org.commonmark.node.ThematicBreak; +import org.commonmark.parser.InlineParserFactory; +import org.commonmark.parser.Parser; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import io.noties.markwon.AbstractMarkwonPlugin; +import io.noties.markwon.Markwon; +import io.noties.markwon.inlineparser.BackticksInlineProcessor; +import io.noties.markwon.inlineparser.CloseBracketInlineProcessor; +import io.noties.markwon.inlineparser.MarkwonInlineParser; +import io.noties.markwon.inlineparser.OpenBracketInlineProcessor; +import io.noties.markwon.sample.R; + +public class InlineParserActivity extends Activity { + + private TextView textView; + + @Override + protected void onCreate(@Nullable Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_text_view); + + this.textView = findViewById(R.id.text_view); + +// links_only(); + + disable_code(); + } + + private void links_only() { + + // create an inline-parser-factory that will _ONLY_ parse links + // this would mean: + // * no emphasises (strong and regular aka bold and italics), + // * no images, + // * no code, + // * no HTML entities (&) + // * no HTML tags + // markdown blocks are still parsed + final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder() + .referencesEnabled(true) + .addInlineProcessor(new OpenBracketInlineProcessor()) + .addInlineProcessor(new CloseBracketInlineProcessor()) + .build(); + + final Markwon markwon = Markwon.builder(this) + .usePlugin(new AbstractMarkwonPlugin() { + @Override + public void configureParser(@NonNull Parser.Builder builder) { + builder.inlineParserFactory(inlineParserFactory); + } + }) + .build(); + + // note that image is considered a link now + final String md = "**bold_bold-italic_** html-u, [link](#) ![alt](#image) `code`"; + markwon.setMarkdown(textView, md); + } + + private void disable_code() { + // parses all as usual, but ignores code (inline and block) + + final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder() + .includeDefaults() + .excludeInlineProcessor(BackticksInlineProcessor.class) + .build(); + + // unfortunately there is no _exclude_ method for parser-builder + final Set> enabledBlocks = new HashSet>() {{ + // IndentedCodeBlock.class and FencedCodeBlock.class are missing + // this is full list (including above) that can be passed to `enabledBlockTypes` method + addAll(Arrays.asList( + BlockQuote.class, + Heading.class, + HtmlBlock.class, + ThematicBreak.class, + ListBlock.class)); + }}; + + final Markwon markwon = Markwon.builder(this) + .usePlugin(new AbstractMarkwonPlugin() { + @Override + public void configureParser(@NonNull Parser.Builder builder) { + builder + .inlineParserFactory(inlineParserFactory) + .enabledBlockTypes(enabledBlocks); + } + }) + .build(); + + final String md = "# Head!\n\n" + + "* one\n" + + "+ two\n\n" + + "and **bold** to `you`!\n\n" + + "> a quote _em_\n\n" + + "```java\n" + + "final int i = 0;\n" + + "```\n\n" + + "**Good day!**"; + markwon.setMarkdown(textView, md); + } +} diff --git a/sample/src/main/res/values/strings-samples.xml b/sample/src/main/res/values/strings-samples.xml index 7cf55ed2..a26f62c5 100644 --- a/sample/src/main/res/values/strings-samples.xml +++ b/sample/src/main/res/values/strings-samples.xml @@ -27,4 +27,6 @@ # \# Editor\n\n`MarkwonEditor` sample usage to highlight user input in EditText + # \# Inline Parser\n\nUsage of custom inline parser + \ No newline at end of file From 136c6bd51b970838a4f119417a15faedea2e10fd Mon Sep 17 00:00:00 2001 From: Dimitry Ivanov Date: Wed, 13 Nov 2019 14:49:59 +0300 Subject: [PATCH 4/4] Add spec test to inline-parser module --- build.gradle | 11 ++++---- docs/.vuepress/.artifacts.js | 2 +- markwon-inline-parser/build.gradle | 5 ++++ markwon-inline-parser/gradle.properties | 2 +- .../inlineparser/InlineParserSpecTest.java | 25 +++++++++++++++++++ 5 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 markwon-inline-parser/src/test/java/io/noties/markwon/inlineparser/InlineParserSpecTest.java diff --git a/build.gradle b/build.gradle index 9f065c7a..b176cb6c 100644 --- a/build.gradle +++ b/build.gradle @@ -81,11 +81,12 @@ ext { ] deps['test'] = [ - 'junit' : 'junit:junit:4.12', - 'robolectric': 'org.robolectric:robolectric:3.8', - 'ix-java' : 'com.github.akarnokd:ixjava:1.0.0', - 'commons-io' : 'commons-io:commons-io:2.6', - 'mockito' : 'org.mockito:mockito-core:2.21.0' + 'junit' : 'junit:junit:4.12', + 'robolectric' : 'org.robolectric:robolectric:3.8', + 'ix-java' : 'com.github.akarnokd:ixjava:1.0.0', + 'commons-io' : 'commons-io:commons-io:2.6', + 'mockito' : 'org.mockito:mockito-core:2.21.0', + 'commonmark-test-util': "com.atlassian.commonmark:commonmark-test-util:$commonMarkVersion", ] registerArtifact = this.®isterArtifact diff --git a/docs/.vuepress/.artifacts.js b/docs/.vuepress/.artifacts.js index 50625f79..bad70455 100644 --- a/docs/.vuepress/.artifacts.js +++ b/docs/.vuepress/.artifacts.js @@ -1,4 +1,4 @@ // this is a generated file, do not modify. To update it run 'collectArtifacts.js' script -const artifacts = [{"id":"core","name":"Core","group":"io.noties.markwon","description":"Core Markwon artifact that includes basic markdown parsing and rendering"},{"id":"editor","name":"Editor","group":"io.noties.markwon","description":"Markdown editor based on Markwon"},{"id":"ext-latex","name":"LaTeX","group":"io.noties.markwon","description":"Extension to add LaTeX formulas to Markwon markdown"},{"id":"ext-strikethrough","name":"Strikethrough","group":"io.noties.markwon","description":"Extension to add strikethrough markup to Markwon markdown"},{"id":"ext-tables","name":"Tables","group":"io.noties.markwon","description":"Extension to add tables markup (GFM) to Markwon markdown"},{"id":"ext-tasklist","name":"Task List","group":"io.noties.markwon","description":"Extension to add task lists (GFM) to Markwon markdown"},{"id":"html","name":"HTML","group":"io.noties.markwon","description":"Provides HTML parsing functionality"},{"id":"image","name":"Image","group":"io.noties.markwon","description":"Markwon image loading module (with optional GIF and SVG support)"},{"id":"image-glide","name":"Image Glide","group":"io.noties.markwon","description":"Markwon image loading module (based on Glide library)"},{"id":"image-picasso","name":"Image Picasso","group":"io.noties.markwon","description":"Markwon image loading module (based on Picasso library)"},{"id":"linkify","name":"Linkify","group":"io.noties.markwon","description":"Markwon plugin to linkify text (based on Android Linkify)"},{"id":"recycler","name":"Recycler","group":"io.noties.markwon","description":"Provides RecyclerView.Adapter to display Markwon markdown"},{"id":"recycler-table","name":"Recycler Table","group":"io.noties.markwon","description":"Provides MarkwonAdapter.Entry to render TableBlocks inside Android-native TableLayout widget"},{"id":"simple-ext","name":"Simple Extension","group":"io.noties.markwon","description":"Custom extension based on simple delimiter usage"},{"id":"syntax-highlight","name":"Syntax Highlight","group":"io.noties.markwon","description":"Add syntax highlight to Markwon markdown via Prism4j library"}]; +const artifacts = [{"id":"core","name":"Core","group":"io.noties.markwon","description":"Core Markwon artifact that includes basic markdown parsing and rendering"},{"id":"editor","name":"Editor","group":"io.noties.markwon","description":"Markdown editor based on Markwon"},{"id":"ext-latex","name":"LaTeX","group":"io.noties.markwon","description":"Extension to add LaTeX formulas to Markwon markdown"},{"id":"ext-strikethrough","name":"Strikethrough","group":"io.noties.markwon","description":"Extension to add strikethrough markup to Markwon markdown"},{"id":"ext-tables","name":"Tables","group":"io.noties.markwon","description":"Extension to add tables markup (GFM) to Markwon markdown"},{"id":"ext-tasklist","name":"Task List","group":"io.noties.markwon","description":"Extension to add task lists (GFM) to Markwon markdown"},{"id":"html","name":"HTML","group":"io.noties.markwon","description":"Provides HTML parsing functionality"},{"id":"image","name":"Image","group":"io.noties.markwon","description":"Markwon image loading module (with optional GIF and SVG support)"},{"id":"image-glide","name":"Image Glide","group":"io.noties.markwon","description":"Markwon image loading module (based on Glide library)"},{"id":"image-picasso","name":"Image Picasso","group":"io.noties.markwon","description":"Markwon image loading module (based on Picasso library)"},{"id":"inline-parser","name":"Inline Parser","group":"io.noties.markwon","description":"Markwon customizable commonmark-java InlineParser"},{"id":"linkify","name":"Linkify","group":"io.noties.markwon","description":"Markwon plugin to linkify text (based on Android Linkify)"},{"id":"recycler","name":"Recycler","group":"io.noties.markwon","description":"Provides RecyclerView.Adapter to display Markwon markdown"},{"id":"recycler-table","name":"Recycler Table","group":"io.noties.markwon","description":"Provides MarkwonAdapter.Entry to render TableBlocks inside Android-native TableLayout widget"},{"id":"simple-ext","name":"Simple Extension","group":"io.noties.markwon","description":"Custom extension based on simple delimiter usage"},{"id":"syntax-highlight","name":"Syntax Highlight","group":"io.noties.markwon","description":"Add syntax highlight to Markwon markdown via Prism4j library"}]; export { artifacts }; diff --git a/markwon-inline-parser/build.gradle b/markwon-inline-parser/build.gradle index d7a2ed99..703a18ff 100644 --- a/markwon-inline-parser/build.gradle +++ b/markwon-inline-parser/build.gradle @@ -16,6 +16,11 @@ android { dependencies { api deps['x-annotations'] api deps['commonmark'] + + deps['test'].with { + testImplementation it['junit'] + testImplementation it['commonmark-test-util'] + } } registerArtifact(this) \ No newline at end of file diff --git a/markwon-inline-parser/gradle.properties b/markwon-inline-parser/gradle.properties index d386a6a9..264a18ee 100644 --- a/markwon-inline-parser/gradle.properties +++ b/markwon-inline-parser/gradle.properties @@ -1,4 +1,4 @@ POM_NAME=Inline Parser POM_ARTIFACT_ID=inline-parser -POM_DESCRIPTION=Markwon customizable commonmark-java InlineParse +POM_DESCRIPTION=Markwon customizable commonmark-java InlineParser POM_PACKAGING=aar \ No newline at end of file diff --git a/markwon-inline-parser/src/test/java/io/noties/markwon/inlineparser/InlineParserSpecTest.java b/markwon-inline-parser/src/test/java/io/noties/markwon/inlineparser/InlineParserSpecTest.java new file mode 100644 index 00000000..5f05bb02 --- /dev/null +++ b/markwon-inline-parser/src/test/java/io/noties/markwon/inlineparser/InlineParserSpecTest.java @@ -0,0 +1,25 @@ +package io.noties.markwon.inlineparser; + +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.SpecTestCase; +import org.commonmark.testutil.example.Example; + +public class InlineParserSpecTest extends SpecTestCase { + + private static final Parser PARSER = Parser.builder() + .inlineParserFactory(MarkwonInlineParser.factoryBuilder().includeDefaults().build()) + .build(); + + // The spec says URL-escaping is optional, but the examples assume that it's enabled. + private static final HtmlRenderer RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build(); + + public InlineParserSpecTest(Example example) { + super(example); + } + + @Override + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } +}