One of the main difficulties I’ve had with autolinking in pandoc is determining when final punctuation belongs with the link or not. We want to allow things like parentheses in URLs (they come up all the time on Wikipedia), but you don’t want to capture a final parenthesis if the whole URL is in parens. Similarly, a URL can contain a -
character, but what if the user intends to follow the URL with a textual em-dash (---
)? Those final -
characters should not be parsed as part of the URL. So some complex heuristics are needed. Does your library attempt to address this problem?
Here are some of the test cases I used in pandoc, in case it’s helpful. Some of them were borrowed from the ruby rinku library.
bareLinkTests :: [(String, Inlines)]
bareLinkTests =
[ ("http://google.com is a search engine.",
autolink "http://google.com" <> " is a search engine.")
, ("<a href=\"http://foo.bar.baz\">http://foo.bar.baz</a>",
rawInline "html" "<a href=\"http://foo.bar.baz\">" <>
"http://foo.bar.baz" <> rawInline "html" "</a>")
, ("Try this query: http://google.com?search=fish&time=hour.",
"Try this query: " <> autolink "http://google.com?search=fish&time=hour" <> ".")
, ("HTTPS://GOOGLE.COM,",
autolink "HTTPS://GOOGLE.COM" <> ",")
, ("http://el.wikipedia.org/wiki/Τεχνολογία,",
autolink "http://el.wikipedia.org/wiki/Τεχνολογία" <> ",")
, ("doi:10.1000/182,",
autolink "doi:10.1000/182" <> ",")
, ("git://github.com/foo/bar.git,",
autolink "git://github.com/foo/bar.git" <> ",")
, ("file:///Users/joe/joe.txt, and",
autolink "file:///Users/joe/joe.txt" <> ", and")
, ("mailto:someone@somedomain.com.",
autolink "mailto:someone@somedomain.com" <> ".")
, ("Use http: this is not a link!",
"Use http: this is not a link!")
, ("(http://google.com).",
"(" <> autolink "http://google.com" <> ").")
, ("http://en.wikipedia.org/wiki/Sprite_(computer_graphics)",
autolink "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)")
, ("http://en.wikipedia.org/wiki/Sprite_[computer_graphics]",
autolink "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]")
, ("http://en.wikipedia.org/wiki/Sprite_{computer_graphics}",
autolink "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}")
, ("http://example.com/Notification_Center-GitHub-20101108-140050.jpg",
autolink "http://example.com/Notification_Center-GitHub-20101108-140050.jpg")
, ("https://github.com/github/hubot/blob/master/scripts/cream.js#L20-20",
autolink "https://github.com/github/hubot/blob/master/scripts/cream.js#L20-20")
, ("http://www.rubyonrails.com",
autolink "http://www.rubyonrails.com")
, ("http://www.rubyonrails.com:80",
autolink "http://www.rubyonrails.com:80")
, ("http://www.rubyonrails.com/~minam",
autolink "http://www.rubyonrails.com/~minam")
, ("https://www.rubyonrails.com/~minam",
autolink "https://www.rubyonrails.com/~minam")
, ("http://www.rubyonrails.com/~minam/url%20with%20spaces",
autolink "http://www.rubyonrails.com/~minam/url%20with%20spaces")
, ("http://www.rubyonrails.com/foo.cgi?something=here",
autolink "http://www.rubyonrails.com/foo.cgi?something=here")
, ("http://www.rubyonrails.com/foo.cgi?something=here&and=here",
autolink "http://www.rubyonrails.com/foo.cgi?something=here&and=here")
, ("http://www.rubyonrails.com/contact;new",
autolink "http://www.rubyonrails.com/contact;new")
, ("http://www.rubyonrails.com/contact;new%20with%20spaces",
autolink "http://www.rubyonrails.com/contact;new%20with%20spaces")
, ("http://www.rubyonrails.com/contact;new?with=query&string=params",
autolink "http://www.rubyonrails.com/contact;new?with=query&string=params")
, ("http://www.rubyonrails.com/~minam/contact;new?with=query&string=params",
autolink "http://www.rubyonrails.com/~minam/contact;new?with=query&string=params")
, ("http://en.wikipedia.org/wiki/Wikipedia:Today%27s_featured_picture_%28animation%29/January_20%2C_2007",
autolink "http://en.wikipedia.org/wiki/Wikipedia:Today%27s_featured_picture_%28animation%29/January_20%2C_2007")
, ("http://www.mail-archive.com/rails@lists.rubyonrails.org/",
autolink "http://www.mail-archive.com/rails@lists.rubyonrails.org/")
, ("http://www.amazon.com/Testing-Equal-Sign-In-Path/ref=pd_bbs_sr_1?ie=UTF8&s=books&qid=1198861734&sr=8-1",
autolink "http://www.amazon.com/Testing-Equal-Sign-In-Path/ref=pd_bbs_sr_1?ie=UTF8&s=books&qid=1198861734&sr=8-1")
, ("http://en.wikipedia.org/wiki/Texas_hold%27em",
autolink "http://en.wikipedia.org/wiki/Texas_hold%27em")
, ("https://www.google.com/doku.php?id=gps:resource:scs:start",
autolink "https://www.google.com/doku.php?id=gps:resource:scs:start")
, ("http://www.rubyonrails.com",
autolink "http://www.rubyonrails.com")
, ("http://manuals.ruby-on-rails.com/read/chapter.need_a-period/103#page281",
autolink "http://manuals.ruby-on-rails.com/read/chapter.need_a-period/103#page281")
, ("http://foo.example.com/controller/action?parm=value&p2=v2#anchor123",
autolink "http://foo.example.com/controller/action?parm=value&p2=v2#anchor123")
, ("http://foo.example.com:3000/controller/action",
autolink "http://foo.example.com:3000/controller/action")
, ("http://foo.example.com:3000/controller/action+pack",
autolink "http://foo.example.com:3000/controller/action+pack")
, ("http://business.timesonline.co.uk/article/0,,9065-2473189,00.html",
autolink "http://business.timesonline.co.uk/article/0,,9065-2473189,00.html")
, ("http://www.mail-archive.com/ruby-talk@ruby-lang.org/",
autolink "http://www.mail-archive.com/ruby-talk@ruby-lang.org/")
]