diff options
| author | Jason A. Donenfeld <Jason@zx2c4.com> | 2015-10-09 15:13:35 +0200 |
|---|---|---|
| committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2015-10-09 15:13:35 +0200 |
| commit | 525c815cc400bc49881144bcd7e7b717bbc1af5d (patch) | |
| tree | 1d4ed0d11a950c45cc1fceb26cd3aa20ed6c2300 /filters | |
| parent | 6edfc1672cdc5eb0dfb0ff5db0ec1de1ec53415e (diff) | |
filters: Simplify converters
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Diffstat (limited to 'filters')
| -rwxr-xr-x | filters/html-converters/md2html | 283 | ||||
| -rwxr-xr-x | filters/html-converters/resources/markdown.pl | 1727 | ||||
| -rw-r--r-- | filters/html-converters/resources/rst-template.txt | 4 | ||||
| -rwxr-xr-x | filters/html-converters/rst2html | 4 |
4 files changed, 284 insertions, 1734 deletions
diff --git a/filters/html-converters/md2html b/filters/html-converters/md2html index 5cab749..138713d 100755 --- a/filters/html-converters/md2html +++ b/filters/html-converters/md2html @@ -1,2 +1,283 @@ #!/bin/sh -exec "$(dirname "$0")/resources/markdown.pl" +cat <<_EOF +<style> +.markdown-body { + font-size: 14px; + line-height: 1.6; + overflow: hidden; +} +.markdown-body>*:first-child { + margin-top: 0 !important; +} +.markdown-body>*:last-child { + margin-bottom: 0 !important; +} +.markdown-body a.absent { + color: #c00; +} +.markdown-body a.anchor { + display: block; + padding-left: 30px; + margin-left: -30px; + cursor: pointer; + position: absolute; + top: 0; + left: 0; + bottom: 0; +} +.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { + margin: 20px 0 10px; + padding: 0; + font-weight: bold; + -webkit-font-smoothing: antialiased; + cursor: text; + position: relative; +} +.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { + display: none; + color: #000; +} +.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { + text-decoration: none; + line-height: 1; + padding-left: 0; + margin-left: -22px; + top: 15%} +.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { + display: inline-block; +} +.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { + font-size: inherit; +} +.markdown-body h1 { + font-size: 28px; + color: #000; +} +.markdown-body h2 { + font-size: 24px; + border-bottom: 1px solid #ccc; + color: #000; +} +.markdown-body h3 { + font-size: 18px; +} +.markdown-body h4 { + font-size: 16px; +} +.markdown-body h5 { + font-size: 14px; +} +.markdown-body h6 { + color: #777; + font-size: 14px; +} +.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { + margin: 15px 0; +} +.markdown-body hr { + background: transparent url("/dirty-shade.png") repeat-x 0 0; + border: 0 none; + color: #ccc; + height: 4px; + padding: 0; +} +.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { + margin-top: 0; + padding-top: 0; +} +.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { + margin-top: 0; + padding-top: 0; +} +.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { + margin-top: 0; +} +.markdown-body li p.first { + display: inline-block; +} +.markdown-body ul, .markdown-body ol { + padding-left: 30px; +} +.markdown-body ul.no-list, .markdown-body ol.no-list { + list-style-type: none; + padding: 0; +} +.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { + margin-top: 0px; +} +.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { + margin-bottom: 0; +} +.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { + margin-bottom: 0; +} +.markdown-body dl { + padding: 0; +} +.markdown-body dl dt { + font-size: 14px; + font-weight: bold; + font-style: italic; + padding: 0; + margin: 15px 0 5px; +} +.markdown-body dl dt:first-child { + padding: 0; +} +.markdown-body dl dt>:first-child { + margin-top: 0px; +} +.markdown-body dl dt>:last-child { + margin-bottom: 0px; +} +.markdown-body dl dd { + margin: 0 0 15px; + padding: 0 15px; +} +.markdown-body dl dd>:first-child { + margin-top: 0px; +} +.markdown-body dl dd>:last-child { + margin-bottom: 0px; +} +.markdown-body blockquote { + border-left: 4px solid #DDD; + padding: 0 15px; + color: #777; +} +.markdown-body blockquote>:first-child { + margin-top: 0px; +} +.markdown-body blockquote>:last-child { + margin-bottom: 0px; +} +.markdown-body table th { + font-weight: bold; +} +.markdown-body table th, .markdown-body table td { + border: 1px solid #ccc; + padding: 6px 13px; +} +.markdown-body table tr { + border-top: 1px solid #ccc; + background-color: #fff; +} +.markdown-body table tr:nth-child(2n) { + background-color: #f8f8f8; +} +.markdown-body img { + max-width: 100%; + -moz-box-sizing: border-box; + box-sizing: border-box; +} +.markdown-body span.frame { + display: block; + overflow: hidden; +} +.markdown-body span.frame>span { + border: 1px solid #ddd; + display: block; + float: left; + overflow: hidden; + margin: 13px 0 0; + padding: 7px; + width: auto; +} +.markdown-body span.frame span img { + display: block; + float: left; +} +.markdown-body span.frame span span { + clear: both; + color: #333; + display: block; + padding: 5px 0 0; +} +.markdown-body span.align-center { + display: block; + overflow: hidden; + clear: both; +} +.markdown-body span.align-center>span { + display: block; + overflow: hidden; + margin: 13px auto 0; + text-align: center; +} +.markdown-body span.align-center span img { + margin: 0 auto; + text-align: center; +} +.markdown-body span.align-right { + display: block; + overflow: hidden; + clear: both; +} +.markdown-body span.align-right>span { + display: block; + overflow: hidden; + margin: 13px 0 0; + text-align: right; +} +.markdown-body span.align-right span img { + margin: 0; + text-align: right; +} +.markdown-body span.float-left { + display: block; + margin-right: 13px; + overflow: hidden; + float: left; +} +.markdown-body span.float-left span { + margin: 13px 0 0; +} +.markdown-body span.float-right { + display: block; + margin-left: 13px; + overflow: hidden; + float: right; +} +.markdown-body span.float-right>span { + display: block; + overflow: hidden; + margin: 13px auto 0; + text-align: right; +} +.markdown-body code, .markdown-body tt { + margin: 0 2px; + padding: 0px 5px; + border: 1px solid #eaeaea; + background-color: #f8f8f8; + border-radius: 3px; +} +.markdown-body code { + white-space: nowrap; +} +.markdown-body pre>code { + margin: 0; + padding: 0; + white-space: pre; + border: none; + background: transparent; +} +.markdown-body .highlight pre, .markdown-body pre { + background-color: #f8f8f8; + border: 1px solid #ccc; + font-size: 13px; + line-height: 19px; + overflow: auto; + padding: 6px 10px; + border-radius: 3px; +} +.markdown-body pre code, .markdown-body pre tt { + margin: 0; + padding: 0; + background-color: transparent; + border: none; +} +</style> +_EOF +echo "<div class='markdown-body'>" +markdown_py -o html5 +echo "</div>" diff --git a/filters/html-converters/resources/markdown.pl b/filters/html-converters/resources/markdown.pl deleted file mode 100755 index 4c39808..0000000 --- a/filters/html-converters/resources/markdown.pl +++ /dev/null @@ -1,1727 +0,0 @@ -#!/usr/bin/perl - -# -# Markdown -- A text-to-HTML conversion tool for web writers -# -# Copyright (c) 2004 John Gruber -# <http://daringfireball.net/projects/markdown/> -# - - -package Markdown; -require 5.006_000; -use strict; -use warnings; - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION); -$VERSION = '1.0.1'; -# Tue 14 Dec 2004 - - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; - - -# -# Globals: -# - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my $g_nested_brackets; -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_html_blocks; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "Markdown", - description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", - doc_link => 'http://daringfireball.net/projects/markdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('markdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('markdown' => { - label => 'Markdown', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('markdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('markdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('markdown_with_smartypants' => { - label => 'Markdown With SmartyPants', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n"; - print "Copyright 2004 John Gruber\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print <<'EOT'; -<style> -.markdown-body { - font-size: 14px; - line-height: 1.6; - overflow: hidden; -} -.markdown-body>*:first-child { - margin-top: 0 !important; -} -.markdown-body>*:last-child { - margin-bottom: 0 !important; -} -.markdown-body a.absent { - color: #c00; -} -.markdown-body a.anchor { - display: block; - padding-left: 30px; - margin-left: -30px; - cursor: pointer; - position: absolute; - top: 0; - left: 0; - bottom: 0; -} -.markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 { - margin: 20px 0 10px; - padding: 0; - font-weight: bold; - -webkit-font-smoothing: antialiased; - cursor: text; - position: relative; -} -.markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link { - display: none; - color: #000; -} -.markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor { - text-decoration: none; - line-height: 1; - padding-left: 0; - margin-left: -22px; - top: 15%} -.markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link { - display: inline-block; -} -.markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code { - font-size: inherit; -} -.markdown-body h1 { - font-size: 28px; - color: #000; -} -.markdown-body h2 { - font-size: 24px; - border-bottom: 1px solid #ccc; - color: #000; -} -.markdown-body h3 { - font-size: 18px; -} -.markdown-body h4 { - font-size: 16px; -} -.markdown-body h5 { - font-size: 14px; -} -.markdown-body h6 { - color: #777; - font-size: 14px; -} -.markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre { - margin: 15px 0; -} -.markdown-body hr { - background: transparent url("/dirty-shade.png") repeat-x 0 0; - border: 0 none; - color: #ccc; - height: 4px; - padding: 0; -} -.markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child { - margin-top: 0; - padding-top: 0; -} -.markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 { - margin-top: 0; - padding-top: 0; -} -.markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p { - margin-top: 0; -} -.markdown-body li p.first { - display: inline-block; -} -.markdown-body ul, .markdown-body ol { - padding-left: 30px; -} -.markdown-body ul.no-list, .markdown-body ol.no-list { - list-style-type: none; - padding: 0; -} -.markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type { - margin-top: 0px; -} -.markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type { - margin-bottom: 0; -} -.markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul { - margin-bottom: 0; -} -.markdown-body dl { - padding: 0; -} -.markdown-body dl dt { - font-size: 14px; - font-weight: bold; - font-style: italic; - padding: 0; - margin: 15px 0 5px; -} -.markdown-body dl dt:first-child { - padding: 0; -} -.markdown-body dl dt>:first-child { - margin-top: 0px; -} -.markdown-body dl dt>:last-child { - margin-bottom: 0px; -} -.markdown-body dl dd { - margin: 0 0 15px; - padding: 0 15px; -} -.markdown-body dl dd>:first-child { - margin-top: 0px; -} -.markdown-body dl dd>:last-child { - margin-bottom: 0px; -} -.markdown-body blockquote { - border-left: 4px solid #DDD; - padding: 0 15px; - color: #777; -} -.markdown-body blockquote>:first-child { - margin-top: 0px; -} -.markdown-body blockquote>:last-child { - margin-bottom: 0px; -} -.markdown-body table th { - font-weight: bold; -} -.markdown-body table th, .markdown-body table td { - border: 1px solid #ccc; - padding: 6px 13px; -} -.markdown-body table tr { - border-top: 1px solid #ccc; - background-color: #fff; -} -.markdown-body table tr:nth-child(2n) { - background-color: #f8f8f8; -} -.markdown-body img { - max-width: 100%; - -moz-box-sizing: border-box; - box-sizing: border-box; -} -.markdown-body span.frame { - display: block; - overflow: hidden; -} -.markdown-body span.frame>span { - border: 1px solid #ddd; - display: block; - float: left; - overflow: hidden; - margin: 13px 0 0; - padding: 7px; - width: auto; -} -.markdown-body span.frame span img { - display: block; - float: left; -} -.markdown-body span.frame span span { - clear: both; - color: #333; - display: block; - padding: 5px 0 0; -} -.markdown-body span.align-center { - display: block; - overflow: hidden; - clear: both; -} -.markdown-body span.align-center>span { - display: block; - overflow: hidden; - margin: 13px auto 0; - text-align: center; -} -.markdown-body span.align-center span img { - margin: 0 auto; - text-align: center; -} -.markdown-body span.align-right { - display: block; - overflow: hidden; - clear: both; -} -.markdown-body span.align-right>span { - display: block; - overflow: hidden; - margin: 13px 0 0; - text-align: right; -} -.markdown-body span.align-right span img { - margin: 0; - text-align: right; -} -.markdown-body span.float-left { - display: block; - margin-right: 13px; - overflow: hidden; - float: left; -} -.markdown-body span.float-left span { - margin: 13px 0 0; -} -.markdown-body span.float-right { - display: block; - margin-left: 13px; - overflow: hidden; - float: right; -} -.markdown-body span.float-right>span { - display: block; - overflow: hidden; - margin: 13px auto 0; - text-align: right; -} -.markdown-body code, .markdown-body tt { - margin: 0 2px; - padding: 0px 5px; - border: 1px solid #eaeaea; - background-color: #f8f8f8; - border-radius: 3px; -} -.markdown-body code { - white-space: nowrap; -} -.markdown-body pre>code { - margin: 0; - padding: 0; - white-space: pre; - border: none; - background: transparent; -} -.markdown-body .highlight pre, .markdown-body pre { - background-color: #f8f8f8; - border: 1px solid #ccc; - font-size: 13px; - line-height: 19px; - overflow: auto; - padding: 6px 10px; - border-radius: 3px; -} -.markdown-body pre code, .markdown-body pre tt { - margin: 0; - padding: 0; - background-color: transparent; - border: none; -} -</style> -EOT - print "<div class='markdown-body'>"; - print Markdown($text); - print "</div>"; - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialChars(), so that any *'s or _'s in the <a> -# and <img> tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip link definitions, store in hashes. - $text = _StripLinkDefinitions($text); - - $text = _RunBlockGamut($text); - - $text = _UnescapeSpecialChars($text); - - return $text . "\n"; -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - <?(\S+?)>? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - } - {}mx) { - $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/"/g; - } - } - - return $text; -} - - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap <p>s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - # <div> - # <div> - # tags for inner block must be indented. - # </div> - # </div> - # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `<div>` and stop at the first `</div>`. - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_a) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - </\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - - - # - # Now match more liberally, simply from `\n<tag>` to `</tag>\n` - # - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_b) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .*</\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - # Special case just for <hr />. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - <! - (--.*?--\s*)+ - > - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - - $text = _DoLists($text); - - $text = _DoCodeBlocks($text); - - $text = _DoBlockQuotes($text); - - # We already ran _HashHTMLBlocks() before, in Markdown(), but that - # was to escape raw HTML in the original Markdown source. This time, - # we're escaping the markup we've just created, so that we don't wrap - # <p> tags around block-level tags. - $text = _HashHTMLBlocks($text); - - $text = _FormParagraphs($text); - - return $text; -} - - -sub _RunSpanGamut { -# -# These are all the transformations that occur *within* block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoCodeSpans($text); - - $text = _EscapeSpecialChars($text); - - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); - - # Make links out of things like `<http://example.com/>` - # Must come after _DoAnchors(), because you can use < and > - # delimiters in inline links like [this](<url>). - $text = _DoAutoLinks($text); - - $text = _EncodeAmpsAndAngles($text); - - $text = _DoItalicsAndBold($text); - - # Do hard breaks: - $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; - - return $text; -} - - -sub _EscapeSpecialChars { - my $text = shift; - my $tokens ||= _TokenizeHTML($text); - - $text = ''; # rebuild $text from the tokens -# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. -# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - - foreach my $cur_token (@$tokens) { - if ($cur_token->[0] eq "tag") { - # Within tags, encode * and _ so they don't conflict - # with their use in Markdown for italics and strong. - # We're replacing each such character with its - # corresponding MD5 checksum value; this is likely - # overkill, but it should prevent us from colliding - # with the escape values by accident. - $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; - $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; - $text .= $cur_token->[1]; - } else { - my $t = $cur_token->[1]; - $t = _EncodeBackslashEscapes($t); - $text .= $t; - } - } - return $text; -} - - -sub _DoAnchors { -# -# Turn Markdown link shortcuts into XHTML <a> tags. -# - my $text = shift; - - # - # First, handle reference-style links: [link text] [id] - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $link_text; # for shortcut links like [this][]. - } - - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "<a href=\"$url\""; - if ( defined $g_titles{$link_id} ) { - my $title = $g_titles{$link_id}; - $title =~ s! \* !$g_escape_table{'*'}!gx; - $title =~ s! _ !$g_escape_table{'_'}!gx; - $result .= " title=\"$title\""; - } - $result .= ">$link_text</a>"; - } - else { - $result = $whole_match; - } - $result; - }xsge; - - # - # Next, inline-style links: [link text](url "optional title") - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - \( # literal paren - [ \t]* - <?(.*?)>? # href = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # Title = $6 |
