diff options
Diffstat (limited to 'Bugzilla/Markdown.pm')
-rw-r--r-- | Bugzilla/Markdown.pm | 520 |
1 files changed, 0 insertions, 520 deletions
diff --git a/Bugzilla/Markdown.pm b/Bugzilla/Markdown.pm deleted file mode 100644 index 353c2ff6a..000000000 --- a/Bugzilla/Markdown.pm +++ /dev/null @@ -1,520 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# -# This Source Code Form is "Incompatible With Secondary Licenses", as -# defined by the Mozilla Public License, v. 2.0. - -package Bugzilla::Markdown; - -use 5.10.1; -use strict; -use warnings; - -use Bugzilla::Constants; -use Bugzilla::Template; - -use Digest::MD5 qw(md5_hex); - -use parent qw(Text::Markdown); - -@Bugzilla::Markdown::EXPORT = qw(new); - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -our ($g_nested_brackets, $g_nested_parens); -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; -# Doesn't allow for whitespace, because we're using it to match URLs: -$g_nested_parens = qr{ - (?> # Atomic matching - [^()\s]+ # Anything other than parens or whitespace - | - \( - (??{ $g_nested_parens }) # Recursive set of nested brackets - \) - )* -}x; - -our %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!~') { - $g_escape_table{$char} = md5_hex($char); -} -$g_escape_table{'<'} = md5_hex('<'); - -sub new { - my $invocant = shift; - my $class = ref $invocant || $invocant; - return $class->SUPER::new(tab_width => MARKDOWN_TAB_WIDTH, - # Bugzilla uses HTML not XHTML - empty_element_suffix => '>'); -} - -sub markdown { - my $self = shift; - my $text = shift; - my $user = Bugzilla->user; - - if ($user->settings->{use_markdown}->{is_enabled} - && $user->setting('use_markdown') eq 'on') - { - return $self->SUPER::markdown($text, @_); - } - - return Bugzilla::Template::quoteUrls($text); -} - -sub _Markdown { - my $self = shift; - my $text = shift; - - $text = Bugzilla::Template::quoteUrls($text, undef, undef, undef, undef, 1); - - return $self->SUPER::_Markdown($text, @_); -} - -sub _RunSpanGamut { - # These are all the transformations that occur *within* block-level - # tags like paragraphs, headers, and list items. - - my ($self, $text) = @_; - - $text = $self->_DoCodeSpans($text); - $text = $self->_EscapeSpecialCharsWithinTagAttributes($text); - $text = $self->_EscapeSpecialChars($text); - - $text = $self->_DoAnchors($text); - - # Strikethroughs is Bugzilla's extension - $text = $self->_DoStrikethroughs($text); - - $text = $self->_DoAutoLinks($text); - $text = $self->_EncodeAmpsAndAngles($text); - $text = $self->_DoItalicsAndBold($text); - - $text =~ s/\n/<br$self->{empty_element_suffix}\n/g; - - return $text; -} - -# Override to check for HTML-escaped <>" chars. -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my ($self, $text) = @_; - my $less_than_tab = $self->{tab_width} - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = \$1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - (?:<)?<a\s+href="(.+?)">\2</a>(?:>)? # url = \$2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - (?:"|\() - (.+?) # title = \$3 - (?:"|\)) - [ \t]* - )? # title is optional - (?:\n+|\Z) - }{}omx) { - $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $self->{_titles}{lc $1} = $3; - $self->{_titles}{lc $1} =~ s/"/"/g; - } - - } - - return $text; -} - -# We need to look for HTML-escaped '<' and '>' (i.e. < and >). -# We also remove Email linkification from the original implementation -# as it is already done in Bugzilla's quoteUrls(). -sub _DoAutoLinks { - my ($self, $text) = @_; - - $text =~ s{(?:<|<)((?:https?|ftp):[^'">\s]+?)(?:>|>)}{<a href="$1">$1</a>}gi; - return $text; -} - -# The main reasons for overriding this method are -# resolving URL conflicts with Bugzilla's quoteUrls() -# and also changing '"' to '"' in regular expressions wherever needed. -sub _DoAnchors { -# -# Turn Markdown link shortcuts into <a> tags. -# - my ($self, $text) = @_; - - # We revert linkifications of non-email links and only - # those links whose URL and title are the same because - # this way we can be sure that link is generated by quoteUrls() - $text =~ s@<a \s+ href="(?! mailto ) (.+?)">\1</a>@$1@xmg; - - # - # First, handle reference-style links: [link text] [id] - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }{ - my $whole_match = $1; - my $link_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $link_text; # for shortcut links like [this][]. - } - - $link_id =~ s{[ ]*\n}{ }g; # turn embedded newlines into spaces - - $self->_GenerateAnchor($whole_match, $link_text, $link_id); - }xsge; - - # - # Next, inline-style links: [link text](url "optional title") - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - \( # literal paren - [ \t]* - ($g_nested_parens) # href = $3 - [ \t]* - ( # $4 - ("|') # quote char = $5 - (.*?) # Title = $6 - \5 # matching quote - [ \t]* # ignore any spaces/tabs between closing quote and ) - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $url = $3; - my $title = $6; - - # Remove Bugzilla quoteUrls() linkification - if ($url =~ /^a href="/ && $url =~ m|</a$|) { - $url =~ s/^[^>]+>//; - $url =~ s@</a$@@; - } - - # Limit URL to HTTP/HTTPS links - $url = "http://$url" unless $url =~ m!^https?://!i; - - $self->_GenerateAnchor($whole_match, $link_text, undef, $url, $title); - }xsge; - - # - # Last, handle reference-style shortcuts: [link text] - # These must come last in case you've also got [link test][1] - # or [link test](/foo) - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ([^\[\]]+) # link text = $2; can't contain '[' or ']' - \] - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - (my $link_id = lc $2) =~ s{[ ]*\n}{ }g; # lower-case and turn embedded newlines into spaces - - $self->_GenerateAnchor($whole_match, $link_text, $link_id); - }xsge; - - return $text; -} - -# The purpose of overriding this function is to add support -# for a Github Flavored Markdown (GFM) feature called 'Multiple -# underscores in words'. The standard markdown specification -# specifies the underscore for making the text emphasized/bold. -# However, some variable names in programming languages contain underscores -# and we do not want a part of those variables to look emphasized/bold. -# Instead, we render them as the way they originally are. -sub _DoItalicsAndBold { - my ($self, $text) = @_; - - # Handle at beginning of lines: - $text =~ s{ (^__ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S)) } - { - my $result = _has_multiple_underscores($2) ? $1 : "<strong>$2</strong>"; - $result; - }gsxe; - - $text =~ s{ ^\*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx; - - $text =~ s{ (^_ (?=\S) (.+?) (?<=\S) _ (?!\S)) } - { - my $result = _has_multiple_underscores($2) ? $1 : "<em>$2</em>"; - $result; - }gsxe; - - $text =~ s{ ^\* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx; - - # <strong> must go first: - $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S) ) } - { - my $result = _has_multiple_underscores($2) ? $1 : "<strong>$2</strong>"; - $result; - }gsxe; - - - $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx; - - $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (?!\S) ) } - { - my $result = _has_multiple_underscores($2) ? $1 : "<em>$2</em>"; - $result; - }gsxe; - - $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx; - - # And now, a second pass to catch nested strong and emphasis special cases - $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (\S*) ) } - { - my $result = _has_multiple_underscores($3) ? $1 : "<strong>$2</strong>$3"; - $result; - }gsxe; - - $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx; - $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (\S*) ) } - { - my $result = _has_multiple_underscores($3) ? $1 : "<em>$2</em>$3"; - $result; - }gsxe; - - $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx; - - return $text; -} - -sub _DoStrikethroughs { - my ($self, $text) = @_; - - $text =~ s{ ^ ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{<del>$1</del>}gsx; - $text =~ s{ (?<=_|[^~\w]) ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{<del>$1</del>}gsx; - - return $text; -} - -# The original _DoCodeSpans() uses the 's' modifier in its regex -# which prevents _DoCodeBlocks() to match GFM fenced code blocks. -# We copy the code from the original implementation and remove the -# 's' modifier from it. -sub _DoCodeSpans { - my ($self, $text) = @_; - - $text =~ s@ - (?<!\\) # Character before opening ` can't be a backslash - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?<!`) - \1 # Matching closer - (?!`) - @ - my $c = "$2"; - $c =~ s/^[ \t]*//g; # leading whitespace - $c =~ s/[ \t]*$//g; # trailing whitespace - $c = $self->_EncodeCode($c); - "<code>$c</code>"; - @egx; - - return $text; -} - -# Override to add GFM Fenced Code Blocks -sub _DoCodeBlocks { - my ($self, $text) = @_; - - $text =~ s{ - ^ `{3,} [\s\t]* \n - ( # $1 = the entire code block - (?: .* \n+)+? - ) - `{3,} [\s\t]* $ - }{ - my $codeblock = $1; - my $result; - - $codeblock = $self->_EncodeCode($codeblock); - $codeblock = $self->_Detab($codeblock); - $codeblock =~ s/\n\z//; # remove the trailing newline - - $result = "\n\n<pre><code>" . $codeblock . "</code></pre>\n\n"; - $result; - }egmx; - - # And now do the standard code blocks - $text = $self->SUPER::_DoCodeBlocks($text); - - return $text; -} - -sub _DoBlockQuotes { - my ($self, $text) = @_; - - $text =~ s{ - ( # Wrap whole match in $1 - (?: - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (?:.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = $self->_RunBlockGamut($bq, {wrap_in_p_tags => 1}); # recurse - $bq =~ s/^/ /mg; - # These leading spaces screw with <pre> content, so we need to fix that: - $bq =~ s{(\s*<pre>.+?</pre>)}{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egs; - "<blockquote>\n$bq\n</blockquote>\n\n"; - }egmx; - - return $text; -} - -sub _EncodeCode { - my ($self, $text) = @_; - - # We need to unescape the escaped HTML characters in code blocks. - # These are the reverse of the escapings done in Bugzilla::Util::html_quote() - $text =~ s/</</g; - $text =~ s/>/>/g; - $text =~ s/"/"/g; - $text =~ s/@/@/g; - # '&' substitution must be the last one, otherwise a literal like '>' - # will turn to '>' because '&' is already changed to '&' in Bugzilla::Util::html_quote(). - # In other words, html_quote() will change '>' to '&gt;' and then we will - # change '&gt' -> '>' -> '>' if we write this substitution as the first one. - $text =~ s/&/&/g; - $text =~ s{<a \s+ href="(?:mailto:)? (.+?)"> \1 </a>}{$1}xmgi; - $text = $self->SUPER::_EncodeCode($text); - $text =~ s/~/$g_escape_table{'~'}/go; - # Encode '<' to prevent URLs from getting linkified in code spans - $text =~ s/</$g_escape_table{'<'}/go; - - return $text; -} - -sub _EncodeBackslashEscapes { - my ($self, $text) = @_; - - $text = $self->SUPER::_EncodeBackslashEscapes($text); - $text =~ s/\\~/$g_escape_table{'~'}/go; - - return $text; -} - -sub _UnescapeSpecialChars { - my ($self, $text) = @_; - - $text = $self->SUPER::_UnescapeSpecialChars($text); - $text =~ s/$g_escape_table{'~'}/~/go; - $text =~ s/$g_escape_table{'<'}/</go; - - return $text; -} - -# Check if the passed string is of the form multiple_underscores_in_a_word. -# To check that, we first need to make sure that the string does not contain -# any white-space. Then, if the string is composed of non-space chunks which -# are bound together with underscores, the string has the desired form. -sub _has_multiple_underscores { - my $string = shift; - return 0 unless defined($string) && length($string); - return 0 if $string =~ /[\t\s]+/; - return 1 if scalar (split /_/, $string) > 1; - return 0; -} - -1; - -__END__ - -=head1 NAME - -Bugzilla::Markdown - Generates HTML output from structured plain-text input. - -=head1 SYNOPSIS - - use Bugzilla::Markdown; - - my $markdown = Bugzilla::Markdown->new(); - print $markdown->markdown($text); - -=head1 DESCRIPTION - -Bugzilla::Markdown implements a Markdown engine that produces -an HTML-based output from a given plain-text input. - -The majority of the implementation is done by C<Text::Markdown> -CPAN module. It also applies the linkifications done in L<Bugzilla::Template> -to the input resulting in an output which is a combination of both Markdown -structures and those defined by Bugzilla itself. - -=head2 Accessors - -=over - -=item C<markdown> - -C<string> Produces an HTML-based output string based on the structures -and format defined in the given plain-text input. - -=over - -=item B<Params> - -=over - -=item C<text> - -C<string> A plain-text string which includes Markdown structures. - -=back - -=back - -=back |