Commonmark-java inline parser implementation

This commit is contained in:
Dimitry Ivanov 2019-11-14 15:30:35 +03:00
commit 3c23140ac0
32 changed files with 2085 additions and 51 deletions

View File

@ -4,6 +4,7 @@
* `MarkwonEditor` to highlight markdown input whilst editing (new module: `markwon-editor`) * `MarkwonEditor` to highlight markdown input whilst editing (new module: `markwon-editor`)
* `CoilImagesPlugin` image loader based on [Coil] library (new module: `markwon-image-coil`) ([#166], [#174]) * `CoilImagesPlugin` image loader based on [Coil] library (new module: `markwon-image-coil`) ([#166], [#174])
<br>Thanks to [@tylerbwong] <br>Thanks to [@tylerbwong]
* `MarkwonInlineParser` to customize inline parsing (new module: `markwon-inline-parser`)
* `Markwon#configuration` method to expose `MarkwonConfiguration` via public API * `Markwon#configuration` method to expose `MarkwonConfiguration` via public API
* `HeadingSpan#getLevel` getter * `HeadingSpan#getLevel` getter
* Add `SvgPictureMediaDecoder` in `image` module to deal with SVG without dimensions ([#165]) * Add `SvgPictureMediaDecoder` in `image` module to deal with SVG without dimensions ([#165])

View File

@ -83,10 +83,11 @@ ext {
deps['test'] = [ deps['test'] = [
'junit' : 'junit:junit:4.12', 'junit' : 'junit:junit:4.12',
'robolectric': 'org.robolectric:robolectric:3.8', 'robolectric' : 'org.robolectric:robolectric:3.8',
'ix-java' : 'com.github.akarnokd:ixjava:1.0.0', 'ix-java' : 'com.github.akarnokd:ixjava:1.0.0',
'commons-io' : 'commons-io:commons-io:2.6', 'commons-io' : 'commons-io:commons-io:2.6',
'mockito' : 'org.mockito:mockito-core:2.21.0' 'mockito' : 'org.mockito:mockito-core:2.21.0',
'commonmark-test-util': "com.atlassian.commonmark:commonmark-test-util:$commonMarkVersion",
] ]
registerArtifact = this.&registerArtifact registerArtifact = this.&registerArtifact

View File

@ -1,4 +1,4 @@
// this is a generated file, do not modify. To update it run 'collectArtifacts.js' script // this is a generated file, do not modify. To update it run 'collectArtifacts.js' script
const artifacts = [{"id":"core","name":"Core","group":"io.noties.markwon","description":"Core Markwon artifact that includes basic markdown parsing and rendering"},{"id":"editor","name":"Editor","group":"io.noties.markwon","description":"Markdown editor based on Markwon"},{"id":"ext-latex","name":"LaTeX","group":"io.noties.markwon","description":"Extension to add LaTeX formulas to Markwon markdown"},{"id":"ext-strikethrough","name":"Strikethrough","group":"io.noties.markwon","description":"Extension to add strikethrough markup to Markwon markdown"},{"id":"ext-tables","name":"Tables","group":"io.noties.markwon","description":"Extension to add tables markup (GFM) to Markwon markdown"},{"id":"ext-tasklist","name":"Task List","group":"io.noties.markwon","description":"Extension to add task lists (GFM) to Markwon markdown"},{"id":"html","name":"HTML","group":"io.noties.markwon","description":"Provides HTML parsing functionality"},{"id":"image","name":"Image","group":"io.noties.markwon","description":"Markwon image loading module (with optional GIF and SVG support)"},{"id":"image-glide","name":"Image Glide","group":"io.noties.markwon","description":"Markwon image loading module (based on Glide library)"},{"id":"image-picasso","name":"Image Picasso","group":"io.noties.markwon","description":"Markwon image loading module (based on Picasso library)"},{"id":"linkify","name":"Linkify","group":"io.noties.markwon","description":"Markwon plugin to linkify text (based on Android Linkify)"},{"id":"recycler","name":"Recycler","group":"io.noties.markwon","description":"Provides RecyclerView.Adapter to display Markwon markdown"},{"id":"recycler-table","name":"Recycler Table","group":"io.noties.markwon","description":"Provides MarkwonAdapter.Entry to render TableBlocks inside Android-native TableLayout widget"},{"id":"simple-ext","name":"Simple Extension","group":"io.noties.markwon","description":"Custom extension based on simple delimiter usage"},{"id":"syntax-highlight","name":"Syntax Highlight","group":"io.noties.markwon","description":"Add syntax highlight to Markwon markdown via Prism4j library"}]; const artifacts = [{"id":"core","name":"Core","group":"io.noties.markwon","description":"Core Markwon artifact that includes basic markdown parsing and rendering"},{"id":"editor","name":"Editor","group":"io.noties.markwon","description":"Markdown editor based on Markwon"},{"id":"ext-latex","name":"LaTeX","group":"io.noties.markwon","description":"Extension to add LaTeX formulas to Markwon markdown"},{"id":"ext-strikethrough","name":"Strikethrough","group":"io.noties.markwon","description":"Extension to add strikethrough markup to Markwon markdown"},{"id":"ext-tables","name":"Tables","group":"io.noties.markwon","description":"Extension to add tables markup (GFM) to Markwon markdown"},{"id":"ext-tasklist","name":"Task List","group":"io.noties.markwon","description":"Extension to add task lists (GFM) to Markwon markdown"},{"id":"html","name":"HTML","group":"io.noties.markwon","description":"Provides HTML parsing functionality"},{"id":"image","name":"Image","group":"io.noties.markwon","description":"Markwon image loading module (with optional GIF and SVG support)"},{"id":"image-coil","name":"Image Coil","group":"io.noties.markwon","description":"Markwon image loading module (based on Coil library)"},{"id":"image-glide","name":"Image Glide","group":"io.noties.markwon","description":"Markwon image loading module (based on Glide library)"},{"id":"image-picasso","name":"Image Picasso","group":"io.noties.markwon","description":"Markwon image loading module (based on Picasso library)"},{"id":"inline-parser","name":"Inline Parser","group":"io.noties.markwon","description":"Markwon customizable commonmark-java InlineParser"},{"id":"linkify","name":"Linkify","group":"io.noties.markwon","description":"Markwon plugin to linkify text (based on Android Linkify)"},{"id":"recycler","name":"Recycler","group":"io.noties.markwon","description":"Provides RecyclerView.Adapter to display Markwon markdown"},{"id":"recycler-table","name":"Recycler Table","group":"io.noties.markwon","description":"Provides MarkwonAdapter.Entry to render TableBlocks inside Android-native TableLayout widget"},{"id":"simple-ext","name":"Simple Extension","group":"io.noties.markwon","description":"Custom extension based on simple delimiter usage"},{"id":"syntax-highlight","name":"Syntax Highlight","group":"io.noties.markwon","description":"Add syntax highlight to Markwon markdown via Prism4j library"}];
export { artifacts }; export { artifacts };

View File

@ -105,6 +105,7 @@ module.exports = {
'/docs/v4/image-coil/', '/docs/v4/image-coil/',
'/docs/v4/image-glide/', '/docs/v4/image-glide/',
'/docs/v4/image-picasso/', '/docs/v4/image-picasso/',
'/docs/v4/inline-parser/',
'/docs/v4/linkify/', '/docs/v4/linkify/',
'/docs/v4/recycler/', '/docs/v4/recycler/',
'/docs/v4/recycler-table/', '/docs/v4/recycler-table/',

View File

@ -0,0 +1,78 @@
# Inline Parser <Badge text="4.2.0" />
**Experimental** commonmark-java inline parser that allows customizing
core features and/or extend with own.
Usage of _internal_ classes:
```java
import org.commonmark.internal.Bracket;
import org.commonmark.internal.Delimiter;
import org.commonmark.internal.ReferenceParser;
import org.commonmark.internal.util.Escaping;
import org.commonmark.internal.util.Html5Entities;
import org.commonmark.internal.util.Parsing;
import org.commonmark.internal.inline.AsteriskDelimiterProcessor;
import org.commonmark.internal.inline.UnderscoreDelimiterProcessor;
```
---
```java
// all default (like current commonmark-java InlineParserImpl)
final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder()
.includeDefaults()
.build();
```
```java
// disable images (current markdown images will be considered as links):
final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder()
.includeDefaults()
.excludeInlineProcessor(BangInlineProcessor.class)
.build();
```
```java
// disable core delimiter processors for `*`|`_` and `**`|`__`
final InlineParserFactory factory = MarkwonInlineParser.factoryBuilder()
.includeDefaults()
.excludeDelimiterProcessor(AsteriskDelimiterProcessor.class)
.excludeDelimiterProcessor(UnderscoreDelimiterProcessor.class)
.build();
```
```java
// disable _all_ markdown inlines except for links (open and close bracket handling `[` & `]`)
final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder()
// note that there is no `includeDefaults` method call
.referencesEnabled(true)
.addInlineProcessor(new OpenBracketInlineProcessor())
.addInlineProcessor(new CloseBracketInlineProcessor())
.build();
```
To use custom InlineParser:
```java
final Markwon markwon = Markwon.builder(this)
.usePlugin(new AbstractMarkwonPlugin() {
@Override
public void configureParser(@NonNull Parser.Builder builder) {
builder.inlineParserFactory(inlineParserFactory);
}
})
.build();
```
---
The list of available inline processors:
* `AutolinkInlineProcessor` (`<` =&gt; `<me@mydoma.in>`)
* `BackslashInlineProcessor` (`\\`)
* `BackticksInlineProcessor` (<code>&#96;</code> =&gt; <code>&#96;code&#96;</code>)
* `BangInlineProcessor` (`!` =&gt; `![alt](#src)`)
* `CloseBracketInlineProcessor` (`]` =&gt; `[link](#href)`, `![alt](#src)`)
* `EntityInlineProcessor` (`&` =&gt; `&amp;`)
* `HtmlInlineProcessor` (`<` =&gt; `<html></html>`)
* `NewLineInlineProcessor` (`\n`)
* `OpenBracketInlineProcessor` (`[` =&gt; `[link](#href)`)

View File

@ -0,0 +1,16 @@
# Inline parser
**Experimental** due to usage of internal (but still visible) classes of commonmark-java:
```java
import org.commonmark.internal.Bracket;
import org.commonmark.internal.Delimiter;
import org.commonmark.internal.ReferenceParser;
import org.commonmark.internal.util.Escaping;
import org.commonmark.internal.util.Html5Entities;
import org.commonmark.internal.util.Parsing;
import org.commonmark.internal.inline.AsteriskDelimiterProcessor;
import org.commonmark.internal.inline.UnderscoreDelimiterProcessor;
```
`StaggeredDelimiterProcessor` class source is copied (required for InlineParser)

View File

@ -0,0 +1,26 @@
apply plugin: 'com.android.library'
android {
compileSdkVersion config['compile-sdk']
buildToolsVersion config['build-tools']
defaultConfig {
minSdkVersion config['min-sdk']
targetSdkVersion config['target-sdk']
versionCode 1
versionName version
}
}
dependencies {
api deps['x-annotations']
api deps['commonmark']
deps['test'].with {
testImplementation it['junit']
testImplementation it['commonmark-test-util']
}
}
registerArtifact(this)

View File

@ -0,0 +1,4 @@
POM_NAME=Inline Parser
POM_ARTIFACT_ID=inline-parser
POM_DESCRIPTION=Markwon customizable commonmark-java InlineParser
POM_PACKAGING=aar

View File

@ -0,0 +1 @@
<manifest package="io.noties.markwon.inlineparser" />

View File

@ -0,0 +1,45 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.Link;
import org.commonmark.node.Text;
import java.util.regex.Pattern;
/**
* Parses autolinks, for example {@code <me@mydoma.in>}
*
* @since 4.2.0-SNAPSHOT
*/
public class AutolinkInlineProcessor extends InlineProcessor {
private static final Pattern EMAIL_AUTOLINK = Pattern
.compile("^<([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>");
private static final Pattern AUTOLINK = Pattern
.compile("^<[a-zA-Z][a-zA-Z0-9.+-]{1,31}:[^<>\u0000-\u0020]*>");
@Override
public char specialCharacter() {
return '<';
}
@Override
protected boolean parse() {
String m;
if ((m = match(EMAIL_AUTOLINK)) != null) {
String dest = m.substring(1, m.length() - 1);
Link node = new Link("mailto:" + dest, null);
node.appendChild(new Text(dest));
appendNode(node);
return true;
} else if ((m = match(AUTOLINK)) != null) {
String dest = m.substring(1, m.length() - 1);
Link node = new Link(dest, null);
node.appendChild(new Text(dest));
appendNode(node);
return true;
} else {
return false;
}
}
}

View File

@ -0,0 +1,33 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.HardLineBreak;
import java.util.regex.Pattern;
/**
* @since 4.2.0-SNAPSHOT
*/
public class BackslashInlineProcessor extends InlineProcessor {
private static final Pattern ESCAPABLE = MarkwonInlineParser.ESCAPABLE;
@Override
public char specialCharacter() {
return '\\';
}
@Override
protected boolean parse() {
index++;
if (peek() == '\n') {
appendNode(new HardLineBreak());
index++;
} else if (index < input.length() && ESCAPABLE.matcher(input.substring(index, index + 1)).matches()) {
appendText(input, index, index + 1);
index++;
} else {
appendText("\\");
}
return true;
}
}

View File

@ -0,0 +1,48 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.Code;
import java.util.regex.Pattern;
/**
* Parses inline code surrounded with {@code `} chars {@code `code`}
*
* @since 4.2.0-SNAPSHOT
*/
public class BackticksInlineProcessor extends InlineProcessor {
private static final Pattern TICKS = Pattern.compile("`+");
private static final Pattern TICKS_HERE = Pattern.compile("^`+");
private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE;
@Override
public char specialCharacter() {
return '`';
}
@Override
protected boolean parse() {
String ticks = match(TICKS_HERE);
if (ticks == null) {
return false;
}
int afterOpenTicks = index;
String matched;
while ((matched = match(TICKS)) != null) {
if (matched.equals(ticks)) {
Code node = new Code();
String content = input.substring(afterOpenTicks, index - ticks.length());
String literal = WHITESPACE.matcher(content.trim()).replaceAll(" ");
node.setLiteral(literal);
appendNode(node);
return true;
}
}
// If we got here, we didn't match a closing backtick sequence.
index = afterOpenTicks;
appendText(ticks);
return true;
}
}

View File

@ -0,0 +1,33 @@
package io.noties.markwon.inlineparser;
import org.commonmark.internal.Bracket;
import org.commonmark.node.Text;
/**
* Parses markdown images {@code ![alt](#href)}
*
* @since 4.2.0-SNAPSHOT
*/
public class BangInlineProcessor extends InlineProcessor {
@Override
public char specialCharacter() {
return '!';
}
@Override
protected boolean parse() {
int startIndex = index;
index++;
if (peek() == '[') {
index++;
Text node = appendText("![");
// Add entry to stack for this opener
addBracket(Bracket.image(node, startIndex + 1, lastBracket(), lastDelimiter()));
} else {
appendText("!");
}
return true;
}
}

View File

@ -0,0 +1,142 @@
package io.noties.markwon.inlineparser;
import org.commonmark.internal.Bracket;
import org.commonmark.internal.util.Escaping;
import org.commonmark.node.Image;
import org.commonmark.node.Link;
import org.commonmark.node.Node;
import java.util.regex.Pattern;
import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes;
/**
* Parses markdown link or image, relies on {@link OpenBracketInlineProcessor}
* to handle start of these elements
*
* @since 4.2.0-SNAPSHOT
*/
public class CloseBracketInlineProcessor extends InlineProcessor {
private static final Pattern WHITESPACE = MarkwonInlineParser.WHITESPACE;
@Override
public char specialCharacter() {
return ']';
}
@Override
protected boolean parse() {
index++;
int startIndex = index;
// Get previous `[` or `![`
Bracket opener = lastBracket();
if (opener == null) {
// No matching opener, just return a literal.
appendText("]");
return true;
}
if (!opener.allowed) {
// Matching opener but it's not allowed, just return a literal.
appendText("]");
removeLastBracket();
return true;
}
// Check to see if we have a link/image
String dest = null;
String title = null;
boolean isLinkOrImage = false;
// Maybe a inline link like `[foo](/uri "title")`
if (peek() == '(') {
index++;
spnl();
if ((dest = parseLinkDestination()) != null) {
spnl();
// title needs a whitespace before
if (WHITESPACE.matcher(input.substring(index - 1, index)).matches()) {
title = parseLinkTitle();
spnl();
}
if (peek() == ')') {
index++;
isLinkOrImage = true;
} else {
index = startIndex;
}
}
}
// Maybe a reference link like `[foo][bar]`, `[foo][]` or `[foo]`
if (!isLinkOrImage) {
// See if there's a link label like `[bar]` or `[]`
int beforeLabel = index;
int labelLength = parseLinkLabel();
String ref = null;
if (labelLength > 2) {
ref = input.substring(beforeLabel, beforeLabel + labelLength);
} else if (!opener.bracketAfter) {
// If the second label is empty `[foo][]` or missing `[foo]`, then the first label is the reference.
// But it can only be a reference when there's no (unescaped) bracket in it.
// If there is, we don't even need to try to look up the reference. This is an optimization.
ref = input.substring(opener.index, startIndex);
}
if (ref != null) {
Link link = referenceMap().get(Escaping.normalizeReference(ref));
if (link != null) {
dest = link.getDestination();
title = link.getTitle();
isLinkOrImage = true;
}
}
}
if (isLinkOrImage) {
// If we got here, open is a potential opener
Node linkOrImage = opener.image ? new Image(dest, title) : new Link(dest, title);
Node node = opener.node.getNext();
while (node != null) {
Node next = node.getNext();
linkOrImage.appendChild(node);
node = next;
}
appendNode(linkOrImage);
// Process delimiters such as emphasis inside link/image
processDelimiters(opener.previousDelimiter);
mergeChildTextNodes(linkOrImage);
// We don't need the corresponding text node anymore, we turned it into a link/image node
opener.node.unlink();
removeLastBracket();
// Links within links are not allowed. We found this link, so there can be no other link around it.
if (!opener.image) {
Bracket bracket = lastBracket();
while (bracket != null) {
if (!bracket.image) {
// Disallow link opener. It will still get matched, but will not result in a link.
bracket.allowed = false;
}
bracket = bracket.previous;
}
}
return true;
} else { // no link or image
appendText("]");
removeLastBracket();
index = startIndex;
return true;
}
}
}

View File

@ -0,0 +1,32 @@
package io.noties.markwon.inlineparser;
import org.commonmark.internal.util.Html5Entities;
import java.util.regex.Pattern;
/**
* Parses HTML entities {@code &amp;}
*
* @since 4.2.0-SNAPSHOT
*/
public class EntityInlineProcessor extends InlineProcessor {
private static final String ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});";
private static final Pattern ENTITY_HERE = Pattern.compile('^' + ENTITY, Pattern.CASE_INSENSITIVE);
@Override
public char specialCharacter() {
return '&';
}
@Override
protected boolean parse() {
String m;
if ((m = match(ENTITY_HERE)) != null) {
appendText(Html5Entities.entityToString(m));
return true;
} else {
return false;
}
}
}

View File

@ -0,0 +1,40 @@
package io.noties.markwon.inlineparser;
import org.commonmark.internal.util.Parsing;
import org.commonmark.node.HtmlInline;
import java.util.regex.Pattern;
/**
* Parses inline HTML tags
*
* @since 4.2.0-SNAPSHOT
*/
public class HtmlInlineProcessor extends InlineProcessor {
private static final String HTMLCOMMENT = "<!---->|<!--(?:-?[^>-])(?:-?[^-])*-->";
private static final String PROCESSINGINSTRUCTION = "[<][?].*?[?][>]";
private static final String DECLARATION = "<![A-Z]+\\s+[^>]*>";
private static final String CDATA = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>";
private static final String HTMLTAG = "(?:" + Parsing.OPENTAG + "|" + Parsing.CLOSETAG + "|" + HTMLCOMMENT
+ "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")";
private static final Pattern HTML_TAG = Pattern.compile('^' + HTMLTAG, Pattern.CASE_INSENSITIVE);
@Override
public char specialCharacter() {
return '<';
}
@Override
protected boolean parse() {
String m = match(HTML_TAG);
if (m != null) {
HtmlInline node = new HtmlInline();
node.setLiteral(m);
appendNode(node);
return true;
} else {
return false;
}
}
}

View File

@ -0,0 +1,77 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.Node;
import org.commonmark.node.Text;
/**
* @since 4.2.0-SNAPSHOT
*/
public abstract class InlineParserUtils {
public static void mergeTextNodesBetweenExclusive(Node fromNode, Node toNode) {
// No nodes between them
if (fromNode == toNode || fromNode.getNext() == toNode) {
return;
}
mergeTextNodesInclusive(fromNode.getNext(), toNode.getPrevious());
}
public static void mergeChildTextNodes(Node node) {
// No children or just one child node, no need for merging
if (node.getFirstChild() == node.getLastChild()) {
return;
}
mergeTextNodesInclusive(node.getFirstChild(), node.getLastChild());
}
public static void mergeTextNodesInclusive(Node fromNode, Node toNode) {
Text first = null;
Text last = null;
int length = 0;
Node node = fromNode;
while (node != null) {
if (node instanceof Text) {
Text text = (Text) node;
if (first == null) {
first = text;
}
length += text.getLiteral().length();
last = text;
} else {
mergeIfNeeded(first, last, length);
first = null;
last = null;
length = 0;
}
if (node == toNode) {
break;
}
node = node.getNext();
}
mergeIfNeeded(first, last, length);
}
public static void mergeIfNeeded(Text first, Text last, int textLength) {
if (first != null && last != null && first != last) {
StringBuilder sb = new StringBuilder(textLength);
sb.append(first.getLiteral());
Node node = first.getNext();
Node stop = last.getNext();
while (node != stop) {
sb.append(((Text) node).getLiteral());
Node unlink = node;
node = node.getNext();
unlink.unlink();
}
String literal = sb.toString();
first.setLiteral(literal);
}
}
private InlineParserUtils() {
}
}

View File

@ -0,0 +1,148 @@
package io.noties.markwon.inlineparser;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.commonmark.internal.Bracket;
import org.commonmark.internal.Delimiter;
import org.commonmark.node.Link;
import org.commonmark.node.Node;
import org.commonmark.node.Text;
import java.util.Map;
import java.util.regex.Pattern;
/**
* @see AutolinkInlineProcessor
* @see BackslashInlineProcessor
* @see BackticksInlineProcessor
* @see BangInlineProcessor
* @see CloseBracketInlineProcessor
* @see EntityInlineProcessor
* @see HtmlInlineProcessor
* @see NewLineInlineProcessor
* @see OpenBracketInlineProcessor
* @see MarkwonInlineParser.FactoryBuilder#addInlineProcessor(InlineProcessor)
* @see MarkwonInlineParser.FactoryBuilder#excludeInlineProcessor(Class)
* @since 4.2.0-SNAPSHOT
*/
public abstract class InlineProcessor {
/**
* Special character that triggers parsing attempt
*/
public abstract char specialCharacter();
/**
* @return boolean indicating if parsing succeeded
*/
protected abstract boolean parse();
protected MarkwonInlineParserContext context;
protected Node block;
protected String input;
protected int index;
public boolean parse(@NonNull MarkwonInlineParserContext context) {
this.context = context;
this.block = context.block();
this.input = context.input();
this.index = context.index();
final boolean result = parse();
// synchronize index
context.setIndex(index);
return result;
}
protected Bracket lastBracket() {
return context.lastBracket();
}
protected Delimiter lastDelimiter() {
return context.lastDelimiter();
}
@NonNull
protected Map<String, Link> referenceMap() {
return context.referenceMap();
}
protected void addBracket(Bracket bracket) {
context.addBracket(bracket);
}
protected void removeLastBracket() {
context.removeLastBracket();
}
protected void spnl() {
context.setIndex(index);
context.spnl();
index = context.index();
}
@Nullable
protected String match(@NonNull Pattern re) {
// before trying to match, we must notify context about our index (which we store additionally here)
context.setIndex(index);
final String result = context.match(re);
// after match we must reflect index change here
this.index = context.index();
return result;
}
@Nullable
protected String parseLinkDestination() {
context.setIndex(index);
final String result = context.parseLinkDestination();
this.index = context.index();
return result;
}
@Nullable
protected String parseLinkTitle() {
context.setIndex(index);
final String result = context.parseLinkTitle();
this.index = context.index();
return result;
}
protected int parseLinkLabel() {
context.setIndex(index);
final int result = context.parseLinkLabel();
this.index = context.index();
return result;
}
protected void processDelimiters(Delimiter stackBottom) {
context.setIndex(index);
context.processDelimiters(stackBottom);
this.index = context.index();
}
protected void appendNode(@NonNull Node node) {
context.appendNode(node);
}
@NonNull
protected Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) {
return context.appendText(text, beginIndex, endIndex);
}
@NonNull
protected Text appendText(@NonNull CharSequence text) {
return context.appendText(text);
}
protected char peek() {
context.setIndex(index);
return context.peek();
}
}

View File

@ -0,0 +1,915 @@
package io.noties.markwon.inlineparser;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.commonmark.internal.Bracket;
import org.commonmark.internal.Delimiter;
import org.commonmark.internal.ReferenceParser;
import org.commonmark.internal.inline.AsteriskDelimiterProcessor;
import org.commonmark.internal.inline.UnderscoreDelimiterProcessor;
import org.commonmark.internal.util.Escaping;
import org.commonmark.node.Link;
import org.commonmark.node.Node;
import org.commonmark.node.Text;
import org.commonmark.parser.InlineParser;
import org.commonmark.parser.InlineParserContext;
import org.commonmark.parser.InlineParserFactory;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static io.noties.markwon.inlineparser.InlineParserUtils.mergeChildTextNodes;
import static io.noties.markwon.inlineparser.InlineParserUtils.mergeTextNodesBetweenExclusive;
/**
* @see #factoryBuilder()
* @see FactoryBuilder
* @since 4.2.0-SNAPSHOT
*/
public class MarkwonInlineParser implements InlineParser, ReferenceParser, MarkwonInlineParserContext {
public interface FactoryBuilder {
/**
* @see InlineProcessor
*/
@NonNull
FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor);
/**
* @see AsteriskDelimiterProcessor
* @see UnderscoreDelimiterProcessor
*/
@NonNull
FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor);
/**
* Indicate if markdown references are enabled. {@code referencesEnabled=true} if {@link #includeDefaults()}
* was called
*/
@NonNull
FactoryBuilder referencesEnabled(boolean referencesEnabled);
/**
* Includes all default delimiter and inline processors, and sets {@code referencesEnabled=true}.
* Useful with subsequent calls to {@link #excludeInlineProcessor(Class)} or {@link #excludeDelimiterProcessor(Class)}
*/
@NonNull
FactoryBuilder includeDefaults();
@NonNull
FactoryBuilder excludeInlineProcessor(@NonNull Class<? extends InlineProcessor> processor);
@NonNull
FactoryBuilder excludeDelimiterProcessor(@NonNull Class<? extends DelimiterProcessor> processor);
@NonNull
InlineParserFactory build();
}
@NonNull
public static FactoryBuilder factoryBuilder() {
return new FactoryBuilderImpl();
}
private static final String ESCAPED_CHAR = "\\\\" + Escaping.ESCAPABLE;
private static final String ASCII_PUNCTUATION = "!\"#\\$%&'\\(\\)\\*\\+,\\-\\./:;<=>\\?@\\[\\\\\\]\\^_`\\{\\|\\}~";
private static final Pattern PUNCTUATION = Pattern
.compile("^[" + ASCII_PUNCTUATION + "\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}\\p{Ps}]");
private static final Pattern LINK_TITLE = Pattern.compile(
"^(?:\"(" + ESCAPED_CHAR + "|[^\"\\x00])*\"" +
'|' +
"'(" + ESCAPED_CHAR + "|[^'\\x00])*'" +
'|' +
"\\((" + ESCAPED_CHAR + "|[^)\\x00])*\\))");
private static final Pattern LINK_DESTINATION_BRACES = Pattern.compile("^(?:[<](?:[^<> \\t\\n\\\\]|\\\\.)*[>])");
private static final Pattern LINK_LABEL = Pattern.compile("^\\[(?:[^\\\\\\[\\]]|\\\\.)*\\]");
private static final Pattern SPNL = Pattern.compile("^ *(?:\n *)?");
private static final Pattern UNICODE_WHITESPACE_CHAR = Pattern.compile("^[\\p{Zs}\t\r\n\f]");
private static final Pattern LINE_END = Pattern.compile("^ *(?:\n|$)");
static final Pattern ESCAPABLE = Pattern.compile('^' + Escaping.ESCAPABLE);
static final Pattern WHITESPACE = Pattern.compile("\\s+");
private final boolean referencesEnabled;
private final BitSet specialCharacters;
private final Map<Character, List<InlineProcessor>> inlineProcessors;
private final Map<Character, DelimiterProcessor> delimiterProcessors;
private Node block;
private String input;
private int index;
/**
* Link references by ID, needs to be built up using parseReference before calling parse.
*/
private Map<String, Link> referenceMap = new HashMap<>(1);
/**
* Top delimiter (emphasis, strong emphasis or custom emphasis). (Brackets are on a separate stack, different
* from the algorithm described in the spec.)
*/
private Delimiter lastDelimiter;
/**
* Top opening bracket (<code>[</code> or <code>![)</code>).
*/
private Bracket lastBracket;
// might we construct these in factory?
public MarkwonInlineParser(
boolean referencesEnabled,
@NonNull List<InlineProcessor> inlineProcessors,
@NonNull List<DelimiterProcessor> delimiterProcessors) {
this.referencesEnabled = referencesEnabled;
this.inlineProcessors = calculateInlines(inlineProcessors);
this.delimiterProcessors = calculateDelimiterProcessors(delimiterProcessors);
this.specialCharacters = calculateSpecialCharacters(
this.inlineProcessors.keySet(),
this.delimiterProcessors.keySet());
}
@NonNull
private static Map<Character, List<InlineProcessor>> calculateInlines(@NonNull List<InlineProcessor> inlines) {
final Map<Character, List<InlineProcessor>> map = new HashMap<>(inlines.size());
List<InlineProcessor> list;
for (InlineProcessor inlineProcessor : inlines) {
final char character = inlineProcessor.specialCharacter();
list = map.get(character);
if (list == null) {
list = new ArrayList<>(1);
map.put(character, list);
}
list.add(inlineProcessor);
}
return map;
}
@NonNull
private static BitSet calculateSpecialCharacters(Set<Character> inlineCharacters, Set<Character> delimiterCharacters) {
final BitSet bitSet = new BitSet();
for (Character c : inlineCharacters) {
bitSet.set(c);
}
for (Character c : delimiterCharacters) {
bitSet.set(c);
}
return bitSet;
}
private static Map<Character, DelimiterProcessor> calculateDelimiterProcessors(List<DelimiterProcessor> delimiterProcessors) {
Map<Character, DelimiterProcessor> map = new HashMap<>();
addDelimiterProcessors(delimiterProcessors, map);
return map;
}
private static void addDelimiterProcessors(Iterable<DelimiterProcessor> delimiterProcessors, Map<Character, DelimiterProcessor> map) {
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
char opening = delimiterProcessor.getOpeningCharacter();
char closing = delimiterProcessor.getClosingCharacter();
if (opening == closing) {
DelimiterProcessor old = map.get(opening);
if (old != null && old.getOpeningCharacter() == old.getClosingCharacter()) {
StaggeredDelimiterProcessor s;
if (old instanceof StaggeredDelimiterProcessor) {
s = (StaggeredDelimiterProcessor) old;
} else {
s = new StaggeredDelimiterProcessor(opening);
s.add(old);
}
s.add(delimiterProcessor);
map.put(opening, s);
} else {
addDelimiterProcessorForChar(opening, delimiterProcessor, map);
}
} else {
addDelimiterProcessorForChar(opening, delimiterProcessor, map);
addDelimiterProcessorForChar(closing, delimiterProcessor, map);
}
}
}
private static void addDelimiterProcessorForChar(char delimiterChar, DelimiterProcessor toAdd, Map<Character, DelimiterProcessor> delimiterProcessors) {
DelimiterProcessor existing = delimiterProcessors.put(delimiterChar, toAdd);
if (existing != null) {
throw new IllegalArgumentException("Delimiter processor conflict with delimiter char '" + delimiterChar + "'");
}
}
/**
* Parse content in block into inline children, using reference map to resolve references.
*/
@Override
public void parse(String content, Node block) {
this.block = block;
this.input = content.trim();
this.index = 0;
this.lastDelimiter = null;
this.lastBracket = null;
boolean moreToParse;
do {
moreToParse = parseInline();
} while (moreToParse);
processDelimiters(null);
mergeChildTextNodes(block);
}
/**
* Attempt to parse a link reference, modifying the internal reference map.
*/
@Override
public int parseReference(String s) {
if (!referencesEnabled) {
return 0;
}
this.input = s;
this.index = 0;
String dest;
String title;
int matchChars;
int startIndex = index;
// label:
matchChars = parseLinkLabel();
if (matchChars == 0) {
return 0;
}
String rawLabel = input.substring(0, matchChars);
// colon:
if (peek() != ':') {
return 0;
}
index++;
// link url
spnl();
dest = parseLinkDestination();
if (dest == null || dest.length() == 0) {
return 0;
}
int beforeTitle = index;
spnl();
title = parseLinkTitle();
if (title == null) {
// rewind before spaces
index = beforeTitle;
}
boolean atLineEnd = true;
if (index != input.length() && match(LINE_END) == null) {
if (title == null) {
atLineEnd = false;
} else {
// the potential title we found is not at the line end,
// but it could still be a legal link reference if we
// discard the title
title = null;
// rewind before spaces
index = beforeTitle;
// and instead check if the link URL is at the line end
atLineEnd = match(LINE_END) != null;
}
}
if (!atLineEnd) {
return 0;
}
String normalizedLabel = Escaping.normalizeReference(rawLabel);
if (normalizedLabel.isEmpty()) {
return 0;
}
if (!referenceMap.containsKey(normalizedLabel)) {
Link link = new Link(dest, title);
referenceMap.put(normalizedLabel, link);
}
return index - startIndex;
}
@Override
@NonNull
public Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex) {
return appendText(text.subSequence(beginIndex, endIndex));
}
@Override
@NonNull
public Text appendText(@NonNull CharSequence text) {
Text node = new Text(text.toString());
appendNode(node);
return node;
}
@Override
public void appendNode(@NonNull Node node) {
block.appendChild(node);
}
/**
* Parse the next inline element in subject, advancing input index.
* On success, add the result to block's children and return true.
* On failure, return false.
*/
private boolean parseInline() {
final char c = peek();
if (c == '\0') {
return false;
}
boolean res = false;
final List<InlineProcessor> inlines = this.inlineProcessors.get(c);
if (inlines != null) {
for (InlineProcessor inline : inlines) {
res = inline.parse(this);
if (res) {
break;
}
}
} else {
final DelimiterProcessor delimiterProcessor = delimiterProcessors.get(c);
if (delimiterProcessor != null) {
res = parseDelimiters(delimiterProcessor, c);
} else {
res = parseString();
}
}
if (!res) {
index++;
// When we get here, it's only for a single special character that turned out to not have a special meaning.
// So we shouldn't have a single surrogate here, hence it should be ok to turn it into a String.
String literal = String.valueOf(c);
appendText(literal);
}
return true;
}
/**
* If RE matches at current index in the input, advance index and return the match; otherwise return null.
*/
@Override
@Nullable
public String match(@NonNull Pattern re) {
if (index >= input.length()) {
return null;
}
Matcher matcher = re.matcher(input);
matcher.region(index, input.length());
boolean m = matcher.find();
if (m) {
index = matcher.end();
return matcher.group();
} else {
return null;
}
}
/**
* Returns the char at the current input index, or {@code '\0'} in case there are no more characters.
*/
@Override
public char peek() {
if (index < input.length()) {
return input.charAt(index);
} else {
return '\0';
}
}
@NonNull
@Override
public Node block() {
return block;
}
@NonNull
@Override
public String input() {
return input;
}
@Override
public int index() {
return index;
}
@Override
public void setIndex(int index) {
this.index = index;
}
@Override
public Bracket lastBracket() {
return lastBracket;
}
@Override
public Delimiter lastDelimiter() {
return lastDelimiter;
}
@NonNull
@Override
public Map<String, Link> referenceMap() {
return referenceMap;
}
@Override
public void addBracket(Bracket bracket) {
if (lastBracket != null) {
lastBracket.bracketAfter = true;
}
lastBracket = bracket;
}
@Override
public void removeLastBracket() {
lastBracket = lastBracket.previous;
}
/**
* Parse zero or more space characters, including at most one newline.
*/
@Override
public boolean spnl() {
match(SPNL);
return true;
}
/**
* Attempt to parse delimiters like emphasis, strong emphasis or custom delimiters.
*/
private boolean parseDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
DelimiterData res = scanDelimiters(delimiterProcessor, delimiterChar);
if (res == null) {
return false;
}
int length = res.count;
int startIndex = index;
index += length;
Text node = appendText(input, startIndex, index);
// Add entry to stack for this opener
lastDelimiter = new Delimiter(node, delimiterChar, res.canOpen, res.canClose, lastDelimiter);
lastDelimiter.length = length;
lastDelimiter.originalLength = length;
if (lastDelimiter.previous != null) {
lastDelimiter.previous.next = lastDelimiter;
}
return true;
}
/**
* Attempt to parse link destination, returning the string or null if no match.
*/
@Override
@Nullable
public String parseLinkDestination() {
String res = match(LINK_DESTINATION_BRACES);
if (res != null) { // chop off surrounding <..>:
if (res.length() == 2) {
return "";
} else {
return Escaping.unescapeString(res.substring(1, res.length() - 1));
}
} else {
int startIndex = index;
parseLinkDestinationWithBalancedParens();
return Escaping.unescapeString(input.substring(startIndex, index));
}
}
private void parseLinkDestinationWithBalancedParens() {
int parens = 0;
while (true) {
char c = peek();
switch (c) {
case '\0':
return;
case '\\':
// check if we have an escapable character
if (index + 1 < input.length() && ESCAPABLE.matcher(input.substring(index + 1, index + 2)).matches()) {
// skip over the escaped character (after switch)
index++;
break;
}
// otherwise, we treat this as a literal backslash
break;
case '(':
parens++;
break;
case ')':
if (parens == 0) {
return;
} else {
parens--;
}
break;
case ' ':
// ASCII space
return;
default:
// or control character
if (Character.isISOControl(c)) {
return;
}
}
index++;
}
}
/**
* Attempt to parse link title (sans quotes), returning the string or null if no match.
*/
@Override
@Nullable
public String parseLinkTitle() {
String title = match(LINK_TITLE);
if (title != null) {
// chop off quotes from title and unescape:
return Escaping.unescapeString(title.substring(1, title.length() - 1));
} else {
return null;
}
}
/**
* Attempt to parse a link label, returning number of characters parsed.
*/
@Override
public int parseLinkLabel() {
String m = match(LINK_LABEL);
// Spec says "A link label can have at most 999 characters inside the square brackets"
if (m == null || m.length() > 1001) {
return 0;
} else {
return m.length();
}
}
/**
* Parse a run of ordinary characters, or a single character with a special meaning in markdown, as a plain string.
*/
private boolean parseString() {
int begin = index;
int length = input.length();
while (index != length) {
if (specialCharacters.get(input.charAt(index))) {
break;
}
index++;
}
if (begin != index) {
appendText(input, begin, index);
return true;
} else {
return false;
}
}
/**
* Scan a sequence of characters with code delimiterChar, and return information about the number of delimiters
* and whether they are positioned such that they can open and/or close emphasis or strong emphasis.
*
* @return information about delimiter run, or {@code null}
*/
private DelimiterData scanDelimiters(DelimiterProcessor delimiterProcessor, char delimiterChar) {
int startIndex = index;
int delimiterCount = 0;
while (peek() == delimiterChar) {
delimiterCount++;
index++;
}
if (delimiterCount < delimiterProcessor.getMinLength()) {
index = startIndex;
return null;
}
String before = startIndex == 0 ? "\n" :
input.substring(startIndex - 1, startIndex);
char charAfter = peek();
String after = charAfter == '\0' ? "\n" :
String.valueOf(charAfter);
// We could be more lazy here, in most cases we don't need to do every match case.
boolean beforeIsPunctuation = PUNCTUATION.matcher(before).matches();
boolean beforeIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(before).matches();
boolean afterIsPunctuation = PUNCTUATION.matcher(after).matches();
boolean afterIsWhitespace = UNICODE_WHITESPACE_CHAR.matcher(after).matches();
boolean leftFlanking = !afterIsWhitespace &&
(!afterIsPunctuation || beforeIsWhitespace || beforeIsPunctuation);
boolean rightFlanking = !beforeIsWhitespace &&
(!beforeIsPunctuation || afterIsWhitespace || afterIsPunctuation);
boolean canOpen;
boolean canClose;
if (delimiterChar == '_') {
canOpen = leftFlanking && (!rightFlanking || beforeIsPunctuation);
canClose = rightFlanking && (!leftFlanking || afterIsPunctuation);
} else {
canOpen = leftFlanking && delimiterChar == delimiterProcessor.getOpeningCharacter();
canClose = rightFlanking && delimiterChar == delimiterProcessor.getClosingCharacter();
}
index = startIndex;
return new DelimiterData(delimiterCount, canOpen, canClose);
}
@Override
public void processDelimiters(Delimiter stackBottom) {
Map<Character, Delimiter> openersBottom = new HashMap<>();
// find first closer above stackBottom:
Delimiter closer = lastDelimiter;
while (closer != null && closer.previous != stackBottom) {
closer = closer.previous;
}
// move forward, looking for closers, and handling each
while (closer != null) {
char delimiterChar = closer.delimiterChar;
DelimiterProcessor delimiterProcessor = delimiterProcessors.get(delimiterChar);
if (!closer.canClose || delimiterProcessor == null) {
closer = closer.next;
continue;
}
char openingDelimiterChar = delimiterProcessor.getOpeningCharacter();
// Found delimiter closer. Now look back for first matching opener.
int useDelims = 0;
boolean openerFound = false;
boolean potentialOpenerFound = false;
Delimiter opener = closer.previous;
while (opener != null && opener != stackBottom && opener != openersBottom.get(delimiterChar)) {
if (opener.canOpen && opener.delimiterChar == openingDelimiterChar) {
potentialOpenerFound = true;
useDelims = delimiterProcessor.getDelimiterUse(opener, closer);
if (useDelims > 0) {
openerFound = true;
break;
}
}
opener = opener.previous;
}
if (!openerFound) {
if (!potentialOpenerFound) {
// Set lower bound for future searches for openers.
// Only do this when we didn't even have a potential
// opener (one that matches the character and can open).
// If an opener was rejected because of the number of
// delimiters (e.g. because of the "multiple of 3" rule),
// we want to consider it next time because the number
// of delimiters can change as we continue processing.
openersBottom.put(delimiterChar, closer.previous);
if (!closer.canOpen) {
// We can remove a closer that can't be an opener,
// once we've seen there's no matching opener:
removeDelimiterKeepNode(closer);
}
}
closer = closer.next;
continue;
}
Text openerNode = opener.node;
Text closerNode = closer.node;
// Remove number of used delimiters from stack and inline nodes.
opener.length -= useDelims;
closer.length -= useDelims;
openerNode.setLiteral(
openerNode.getLiteral().substring(0,
openerNode.getLiteral().length() - useDelims));
closerNode.setLiteral(
closerNode.getLiteral().substring(0,
closerNode.getLiteral().length() - useDelims));
removeDelimitersBetween(opener, closer);
// The delimiter processor can re-parent the nodes between opener and closer,
// so make sure they're contiguous already. Exclusive because we want to keep opener/closer themselves.
mergeTextNodesBetweenExclusive(openerNode, closerNode);
delimiterProcessor.process(openerNode, closerNode, useDelims);
// No delimiter characters left to process, so we can remove delimiter and the now empty node.
if (opener.length == 0) {
removeDelimiterAndNode(opener);
}
if (closer.length == 0) {
Delimiter next = closer.next;
removeDelimiterAndNode(closer);
closer = next;
}
}
// remove all delimiters
while (lastDelimiter != null && lastDelimiter != stackBottom) {
removeDelimiterKeepNode(lastDelimiter);
}
}
private void removeDelimitersBetween(Delimiter opener, Delimiter closer) {
Delimiter delimiter = closer.previous;
while (delimiter != null && delimiter != opener) {
Delimiter previousDelimiter = delimiter.previous;
removeDelimiterKeepNode(delimiter);
delimiter = previousDelimiter;
}
}
/**
* Remove the delimiter and the corresponding text node. For used delimiters, e.g. `*` in `*foo*`.
*/
private void removeDelimiterAndNode(Delimiter delim) {
Text node = delim.node;
node.unlink();
removeDelimiter(delim);
}
/**
* Remove the delimiter but keep the corresponding node as text. For unused delimiters such as `_` in `foo_bar`.
*/
private void removeDelimiterKeepNode(Delimiter delim) {
removeDelimiter(delim);
}
private void removeDelimiter(Delimiter delim) {
if (delim.previous != null) {
delim.previous.next = delim.next;
}
if (delim.next == null) {
// top of stack
lastDelimiter = delim.previous;
} else {
delim.next.previous = delim.previous;
}
}
private static class DelimiterData {
final int count;
final boolean canClose;
final boolean canOpen;
DelimiterData(int count, boolean canOpen, boolean canClose) {
this.count = count;
this.canOpen = canOpen;
this.canClose = canClose;
}
}
static class FactoryBuilderImpl implements FactoryBuilder {
private final List<InlineProcessor> inlineProcessors = new ArrayList<>(3);
private final List<DelimiterProcessor> delimiterProcessors = new ArrayList<>(3);
private boolean referencesEnabled;
@NonNull
@Override
public FactoryBuilder addInlineProcessor(@NonNull InlineProcessor processor) {
this.inlineProcessors.add(processor);
return this;
}
@NonNull
@Override
public FactoryBuilder addDelimiterProcessor(@NonNull DelimiterProcessor processor) {
this.delimiterProcessors.add(processor);
return this;
}
@NonNull
@Override
public FactoryBuilder referencesEnabled(boolean referencesEnabled) {
this.referencesEnabled = referencesEnabled;
return this;
}
@NonNull
@Override
public FactoryBuilder includeDefaults() {
// by default enabled
this.referencesEnabled = true;
this.inlineProcessors.addAll(Arrays.asList(
new AutolinkInlineProcessor(),
new BackslashInlineProcessor(),
new BackticksInlineProcessor(),
new BangInlineProcessor(),
new CloseBracketInlineProcessor(),
new EntityInlineProcessor(),
new HtmlInlineProcessor(),
new NewLineInlineProcessor(),
new OpenBracketInlineProcessor()));
this.delimiterProcessors.addAll(Arrays.asList(
new AsteriskDelimiterProcessor(),
new UnderscoreDelimiterProcessor()));
return this;
}
@NonNull
@Override
public FactoryBuilder excludeInlineProcessor(@NonNull Class<? extends InlineProcessor> type) {
for (int i = 0, size = inlineProcessors.size(); i < size; i++) {
if (type.equals(inlineProcessors.get(i).getClass())) {
inlineProcessors.remove(i);
break;
}
}
return this;
}
@NonNull
@Override
public FactoryBuilder excludeDelimiterProcessor(@NonNull Class<? extends DelimiterProcessor> type) {
for (int i = 0, size = delimiterProcessors.size(); i < size; i++) {
if (type.equals(delimiterProcessors.get(i).getClass())) {
delimiterProcessors.remove(i);
break;
}
}
return this;
}
@NonNull
@Override
public InlineParserFactory build() {
return new InlineParserFactoryImpl(referencesEnabled, inlineProcessors, delimiterProcessors);
}
}
static class InlineParserFactoryImpl implements InlineParserFactory {
private final boolean referencesEnabled;
private final List<InlineProcessor> inlineProcessors;
private final List<DelimiterProcessor> delimiterProcessors;
InlineParserFactoryImpl(
boolean referencesEnabled,
@NonNull List<InlineProcessor> inlineProcessors,
@NonNull List<DelimiterProcessor> delimiterProcessors) {
this.referencesEnabled = referencesEnabled;
this.inlineProcessors = inlineProcessors;
this.delimiterProcessors = delimiterProcessors;
}
@Override
public InlineParser create(InlineParserContext inlineParserContext) {
final List<DelimiterProcessor> delimiterProcessors;
final List<DelimiterProcessor> customDelimiterProcessors = inlineParserContext.getCustomDelimiterProcessors();
final int size = customDelimiterProcessors != null
? customDelimiterProcessors.size()
: 0;
if (size > 0) {
delimiterProcessors = new ArrayList<>(size + this.delimiterProcessors.size());
delimiterProcessors.addAll(this.delimiterProcessors);
delimiterProcessors.addAll(customDelimiterProcessors);
} else {
delimiterProcessors = this.delimiterProcessors;
}
return new MarkwonInlineParser(referencesEnabled, inlineProcessors, delimiterProcessors);
}
}
}

View File

@ -0,0 +1,65 @@
package io.noties.markwon.inlineparser;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.commonmark.internal.Bracket;
import org.commonmark.internal.Delimiter;
import org.commonmark.node.Link;
import org.commonmark.node.Node;
import org.commonmark.node.Text;
import java.util.Map;
import java.util.regex.Pattern;
public interface MarkwonInlineParserContext {
@NonNull
Node block();
@NonNull
String input();
int index();
void setIndex(int index);
Bracket lastBracket();
Delimiter lastDelimiter();
@NonNull
Map<String, Link> referenceMap();
void addBracket(Bracket bracket);
void removeLastBracket();
boolean spnl();
/**
* Returns the char at the current input index, or {@code '\0'} in case there are no more characters.
*/
char peek();
@Nullable
String match(@NonNull Pattern re);
void appendNode(@NonNull Node node);
@NonNull
Text appendText(@NonNull CharSequence text, int beginIndex, int endIndex);
@NonNull
Text appendText(@NonNull CharSequence text);
@Nullable
String parseLinkDestination();
@Nullable
String parseLinkTitle();
int parseLinkLabel();
void processDelimiters(Delimiter stackBottom);
}

View File

@ -0,0 +1,49 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.HardLineBreak;
import org.commonmark.node.Node;
import org.commonmark.node.SoftLineBreak;
import org.commonmark.node.Text;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @since 4.2.0-SNAPSHOT
*/
public class NewLineInlineProcessor extends InlineProcessor {
private static final Pattern FINAL_SPACE = Pattern.compile(" *$");
@Override
public char specialCharacter() {
return '\n';
}
@Override
protected boolean parse() {
index++; // assume we're at a \n
Node lastChild = block.getLastChild();
// Check previous text for trailing spaces.
// The "endsWith" is an optimization to avoid an RE match in the common case.
if (lastChild != null && lastChild instanceof Text && ((Text) lastChild).getLiteral().endsWith(" ")) {
Text text = (Text) lastChild;
String literal = text.getLiteral();
Matcher matcher = FINAL_SPACE.matcher(literal);
int spaces = matcher.find() ? matcher.end() - matcher.start() : 0;
if (spaces > 0) {
text.setLiteral(literal.substring(0, literal.length() - spaces));
}
appendNode(spaces >= 2 ? new HardLineBreak() : new SoftLineBreak());
} else {
appendNode(new SoftLineBreak());
}
// gobble leading spaces in next line
while (peek() == ' ') {
index++;
}
return true;
}
}

View File

@ -0,0 +1,29 @@
package io.noties.markwon.inlineparser;
import org.commonmark.internal.Bracket;
import org.commonmark.node.Text;
/**
* Parses markdown links {@code [link](#href)}
*
* @since 4.2.0-SNAPSHOT
*/
public class OpenBracketInlineProcessor extends InlineProcessor {
@Override
public char specialCharacter() {
return '[';
}
@Override
protected boolean parse() {
int startIndex = index;
index++;
Text node = appendText("[");
// Add entry to stack for this opener
addBracket(Bracket.link(node, startIndex, lastBracket(), lastDelimiter()));
return true;
}
}

View File

@ -0,0 +1,75 @@
package io.noties.markwon.inlineparser;
import org.commonmark.node.Text;
import org.commonmark.parser.delimiter.DelimiterProcessor;
import org.commonmark.parser.delimiter.DelimiterRun;
import java.util.LinkedList;
import java.util.ListIterator;
class StaggeredDelimiterProcessor implements DelimiterProcessor {
private final char delim;
private int minLength = 0;
private LinkedList<DelimiterProcessor> processors = new LinkedList<>(); // in reverse getMinLength order
StaggeredDelimiterProcessor(char delim) {
this.delim = delim;
}
@Override
public char getOpeningCharacter() {
return delim;
}
@Override
public char getClosingCharacter() {
return delim;
}
@Override
public int getMinLength() {
return minLength;
}
void add(DelimiterProcessor dp) {
final int len = dp.getMinLength();
ListIterator<DelimiterProcessor> it = processors.listIterator();
boolean added = false;
while (it.hasNext()) {
DelimiterProcessor p = it.next();
int pLen = p.getMinLength();
if (len > pLen) {
it.previous();
it.add(dp);
added = true;
break;
} else if (len == pLen) {
throw new IllegalArgumentException("Cannot add two delimiter processors for char '" + delim + "' and minimum length " + len);
}
}
if (!added) {
processors.add(dp);
this.minLength = len;
}
}
private DelimiterProcessor findProcessor(int len) {
for (DelimiterProcessor p : processors) {
if (p.getMinLength() <= len) {
return p;
}
}
return processors.getFirst();
}
@Override
public int getDelimiterUse(DelimiterRun opener, DelimiterRun closer) {
return findProcessor(opener.length()).getDelimiterUse(opener, closer);
}
@Override
public void process(Text opener, Text closer, int delimiterUse) {
findProcessor(delimiterUse).process(opener, closer, delimiterUse);
}
}

View File

@ -0,0 +1,25 @@
package io.noties.markwon.inlineparser;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
import org.commonmark.testutil.SpecTestCase;
import org.commonmark.testutil.example.Example;
public class InlineParserSpecTest extends SpecTestCase {
private static final Parser PARSER = Parser.builder()
.inlineParserFactory(MarkwonInlineParser.factoryBuilder().includeDefaults().build())
.build();
// The spec says URL-escaping is optional, but the examples assume that it's enabled.
private static final HtmlRenderer RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build();
public InlineParserSpecTest(Example example) {
super(example);
}
@Override
protected String render(String source) {
return RENDERER.render(PARSER.parse(source));
}
}

View File

@ -41,6 +41,7 @@ dependencies {
implementation project(':markwon-ext-tasklist') implementation project(':markwon-ext-tasklist')
implementation project(':markwon-html') implementation project(':markwon-html')
implementation project(':markwon-image') implementation project(':markwon-image')
implementation project(':markwon-inline-parser')
implementation project(':markwon-linkify') implementation project(':markwon-linkify')
implementation project(':markwon-recycler') implementation project(':markwon-recycler')
implementation project(':markwon-recycler-table') implementation project(':markwon-recycler-table')

View File

@ -33,6 +33,8 @@
android:name=".editor.EditorActivity" android:name=".editor.EditorActivity"
android:windowSoftInputMode="adjustResize" /> android:windowSoftInputMode="adjustResize" />
<activity android:name=".inlineparser.InlineParserActivity" />
</application> </application>
</manifest> </manifest>

View File

@ -24,6 +24,7 @@ import io.noties.markwon.sample.customextension.CustomExtensionActivity;
import io.noties.markwon.sample.customextension2.CustomExtensionActivity2; import io.noties.markwon.sample.customextension2.CustomExtensionActivity2;
import io.noties.markwon.sample.editor.EditorActivity; import io.noties.markwon.sample.editor.EditorActivity;
import io.noties.markwon.sample.html.HtmlActivity; import io.noties.markwon.sample.html.HtmlActivity;
import io.noties.markwon.sample.inlineparser.InlineParserActivity;
import io.noties.markwon.sample.latex.LatexActivity; import io.noties.markwon.sample.latex.LatexActivity;
import io.noties.markwon.sample.precomputed.PrecomputedActivity; import io.noties.markwon.sample.precomputed.PrecomputedActivity;
import io.noties.markwon.sample.recycler.RecyclerActivity; import io.noties.markwon.sample.recycler.RecyclerActivity;
@ -122,6 +123,10 @@ public class MainActivity extends Activity {
activity = EditorActivity.class; activity = EditorActivity.class;
break; break;
case INLINE_PARSER:
activity = InlineParserActivity.class;
break;
default: default:
throw new IllegalStateException("No Activity is associated with sample-item: " + item); throw new IllegalStateException("No Activity is associated with sample-item: " + item);
} }

View File

@ -23,7 +23,9 @@ public enum Sample {
PRECOMPUTED_TEXT(R.string.sample_precomputed_text), PRECOMPUTED_TEXT(R.string.sample_precomputed_text),
EDITOR(R.string.sample_editor); EDITOR(R.string.sample_editor),
INLINE_PARSER(R.string.sample_inline_parser);
private final int textResId; private final int textResId;

View File

@ -18,6 +18,7 @@ import android.widget.TextView;
import androidx.annotation.NonNull; import androidx.annotation.NonNull;
import androidx.annotation.Nullable; import androidx.annotation.Nullable;
import org.commonmark.parser.InlineParserFactory;
import org.commonmark.parser.Parser; import org.commonmark.parser.Parser;
import java.util.ArrayList; import java.util.ArrayList;
@ -36,6 +37,10 @@ import io.noties.markwon.editor.PersistedSpans;
import io.noties.markwon.editor.handler.EmphasisEditHandler; import io.noties.markwon.editor.handler.EmphasisEditHandler;
import io.noties.markwon.editor.handler.StrongEmphasisEditHandler; import io.noties.markwon.editor.handler.StrongEmphasisEditHandler;
import io.noties.markwon.ext.strikethrough.StrikethroughPlugin; import io.noties.markwon.ext.strikethrough.StrikethroughPlugin;
import io.noties.markwon.inlineparser.BangInlineProcessor;
import io.noties.markwon.inlineparser.EntityInlineProcessor;
import io.noties.markwon.inlineparser.HtmlInlineProcessor;
import io.noties.markwon.inlineparser.MarkwonInlineParser;
import io.noties.markwon.linkify.LinkifyPlugin; import io.noties.markwon.linkify.LinkifyPlugin;
import io.noties.markwon.sample.R; import io.noties.markwon.sample.R;
@ -102,7 +107,7 @@ public class EditorActivity extends Activity {
private void additional_edit_span() { private void additional_edit_span() {
// An additional span is used to highlight strong-emphasis // An additional span is used to highlight strong-emphasis
final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this)) final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this))
.useEditHandler(new AbstractEditHandler<StrongEmphasisSpan>() { .useEditHandler(new AbstractEditHandler<StrongEmphasisSpan>() {
@Override @Override
public void configurePersistedSpans(@NonNull PersistedSpans.Builder builder) { public void configurePersistedSpans(@NonNull PersistedSpans.Builder builder) {
@ -171,14 +176,27 @@ final MarkwonEditor editor = MarkwonEditor.builder(Markwon.create(this))
// for links to be clickable // for links to be clickable
editText.setMovementMethod(LinkMovementMethod.getInstance()); editText.setMovementMethod(LinkMovementMethod.getInstance());
final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder()
.includeDefaults()
// no inline images will be parsed
.excludeInlineProcessor(BangInlineProcessor.class)
// no html tags will be parsed
.excludeInlineProcessor(HtmlInlineProcessor.class)
// no entities will be parsed (aka `&amp;` etc)
.excludeInlineProcessor(EntityInlineProcessor.class)
.build();
final Markwon markwon = Markwon.builder(this) final Markwon markwon = Markwon.builder(this)
.usePlugin(StrikethroughPlugin.create()) .usePlugin(StrikethroughPlugin.create())
.usePlugin(LinkifyPlugin.create()) .usePlugin(LinkifyPlugin.create())
.usePlugin(new AbstractMarkwonPlugin() { .usePlugin(new AbstractMarkwonPlugin() {
@Override @Override
public void configureParser(@NonNull Parser.Builder builder) { public void configureParser(@NonNull Parser.Builder builder) {
// disable all commonmark-java blocks, only inlines will be parsed // disable all commonmark-java blocks, only inlines will be parsed
// builder.enabledBlockTypes(Collections.emptySet()); // builder.enabledBlockTypes(Collections.emptySet());
builder.inlineParserFactory(inlineParserFactory);
} }
}) })
.build(); .build();

View File

@ -0,0 +1,119 @@
package io.noties.markwon.sample.inlineparser;
import android.app.Activity;
import android.os.Bundle;
import android.widget.TextView;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.commonmark.node.Block;
import org.commonmark.node.BlockQuote;
import org.commonmark.node.Heading;
import org.commonmark.node.HtmlBlock;
import org.commonmark.node.ListBlock;
import org.commonmark.node.ThematicBreak;
import org.commonmark.parser.InlineParserFactory;
import org.commonmark.parser.Parser;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import io.noties.markwon.AbstractMarkwonPlugin;
import io.noties.markwon.Markwon;
import io.noties.markwon.inlineparser.BackticksInlineProcessor;
import io.noties.markwon.inlineparser.CloseBracketInlineProcessor;
import io.noties.markwon.inlineparser.MarkwonInlineParser;
import io.noties.markwon.inlineparser.OpenBracketInlineProcessor;
import io.noties.markwon.sample.R;
public class InlineParserActivity extends Activity {
private TextView textView;
@Override
protected void onCreate(@Nullable Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_text_view);
this.textView = findViewById(R.id.text_view);
// links_only();
disable_code();
}
private void links_only() {
// create an inline-parser-factory that will _ONLY_ parse links
// this would mean:
// * no emphasises (strong and regular aka bold and italics),
// * no images,
// * no code,
// * no HTML entities (&amp;)
// * no HTML tags
// markdown blocks are still parsed
final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder()
.referencesEnabled(true)
.addInlineProcessor(new OpenBracketInlineProcessor())
.addInlineProcessor(new CloseBracketInlineProcessor())
.build();
final Markwon markwon = Markwon.builder(this)
.usePlugin(new AbstractMarkwonPlugin() {
@Override
public void configureParser(@NonNull Parser.Builder builder) {
builder.inlineParserFactory(inlineParserFactory);
}
})
.build();
// note that image is considered a link now
final String md = "**bold_bold-italic_** <u>html-u</u>, [link](#) ![alt](#image) `code`";
markwon.setMarkdown(textView, md);
}
private void disable_code() {
// parses all as usual, but ignores code (inline and block)
final InlineParserFactory inlineParserFactory = MarkwonInlineParser.factoryBuilder()
.includeDefaults()
.excludeInlineProcessor(BackticksInlineProcessor.class)
.build();
// unfortunately there is no _exclude_ method for parser-builder
final Set<Class<? extends Block>> enabledBlocks = new HashSet<Class<? extends Block>>() {{
// IndentedCodeBlock.class and FencedCodeBlock.class are missing
// this is full list (including above) that can be passed to `enabledBlockTypes` method
addAll(Arrays.asList(
BlockQuote.class,
Heading.class,
HtmlBlock.class,
ThematicBreak.class,
ListBlock.class));
}};
final Markwon markwon = Markwon.builder(this)
.usePlugin(new AbstractMarkwonPlugin() {
@Override
public void configureParser(@NonNull Parser.Builder builder) {
builder
.inlineParserFactory(inlineParserFactory)
.enabledBlockTypes(enabledBlocks);
}
})
.build();
final String md = "# Head!\n\n" +
"* one\n" +
"+ two\n\n" +
"and **bold** to `you`!\n\n" +
"> a quote _em_\n\n" +
"```java\n" +
"final int i = 0;\n" +
"```\n\n" +
"**Good day!**";
markwon.setMarkdown(textView, md);
}
}

View File

@ -27,4 +27,6 @@
<string name="sample_editor"># \# Editor\n\n`MarkwonEditor` sample usage to highlight user input in EditText</string> <string name="sample_editor"># \# Editor\n\n`MarkwonEditor` sample usage to highlight user input in EditText</string>
<string name="sample_inline_parser"># \# Inline Parser\n\nUsage of custom inline parser</string>
</resources> </resources>

View File

@ -11,6 +11,7 @@ include ':app', ':sample',
':markwon-image-coil', ':markwon-image-coil',
':markwon-image-glide', ':markwon-image-glide',
':markwon-image-picasso', ':markwon-image-picasso',
':markwon-inline-parser',
':markwon-linkify', ':markwon-linkify',
':markwon-recycler', ':markwon-recycler',
':markwon-recycler-table', ':markwon-recycler-table',