aboutsummaryrefslogtreecommitdiffstats
path: root/Bugzilla/Markdown.pm
diff options
context:
space:
mode:
Diffstat (limited to 'Bugzilla/Markdown.pm')
-rw-r--r--Bugzilla/Markdown.pm520
1 files changed, 0 insertions, 520 deletions
diff --git a/Bugzilla/Markdown.pm b/Bugzilla/Markdown.pm
deleted file mode 100644
index 353c2ff6a..000000000
--- a/Bugzilla/Markdown.pm
+++ /dev/null
@@ -1,520 +0,0 @@
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This Source Code Form is "Incompatible With Secondary Licenses", as
-# defined by the Mozilla Public License, v. 2.0.
-
-package Bugzilla::Markdown;
-
-use 5.10.1;
-use strict;
-use warnings;
-
-use Bugzilla::Constants;
-use Bugzilla::Template;
-
-use Digest::MD5 qw(md5_hex);
-
-use parent qw(Text::Markdown);
-
-@Bugzilla::Markdown::EXPORT = qw(new);
-
-# Regex to match balanced [brackets]. See Friedl's
-# "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
-our ($g_nested_brackets, $g_nested_parens);
-$g_nested_brackets = qr{
- (?> # Atomic matching
- [^\[\]]+ # Anything other than brackets
- |
- \[
- (??{ $g_nested_brackets }) # Recursive set of nested brackets
- \]
- )*
-}x;
-# Doesn't allow for whitespace, because we're using it to match URLs:
-$g_nested_parens = qr{
- (?> # Atomic matching
- [^()\s]+ # Anything other than parens or whitespace
- |
- \(
- (??{ $g_nested_parens }) # Recursive set of nested brackets
- \)
- )*
-}x;
-
-our %g_escape_table;
-foreach my $char (split //, '\\`*_{}[]()>#+-.!~') {
- $g_escape_table{$char} = md5_hex($char);
-}
-$g_escape_table{'<'} = md5_hex('<');
-
-sub new {
- my $invocant = shift;
- my $class = ref $invocant || $invocant;
- return $class->SUPER::new(tab_width => MARKDOWN_TAB_WIDTH,
- # Bugzilla uses HTML not XHTML
- empty_element_suffix => '>');
-}
-
-sub markdown {
- my $self = shift;
- my $text = shift;
- my $user = Bugzilla->user;
-
- if ($user->settings->{use_markdown}->{is_enabled}
- && $user->setting('use_markdown') eq 'on')
- {
- return $self->SUPER::markdown($text, @_);
- }
-
- return Bugzilla::Template::quoteUrls($text);
-}
-
-sub _Markdown {
- my $self = shift;
- my $text = shift;
-
- $text = Bugzilla::Template::quoteUrls($text, undef, undef, undef, undef, 1);
-
- return $self->SUPER::_Markdown($text, @_);
-}
-
-sub _RunSpanGamut {
- # These are all the transformations that occur *within* block-level
- # tags like paragraphs, headers, and list items.
-
- my ($self, $text) = @_;
-
- $text = $self->_DoCodeSpans($text);
- $text = $self->_EscapeSpecialCharsWithinTagAttributes($text);
- $text = $self->_EscapeSpecialChars($text);
-
- $text = $self->_DoAnchors($text);
-
- # Strikethroughs is Bugzilla's extension
- $text = $self->_DoStrikethroughs($text);
-
- $text = $self->_DoAutoLinks($text);
- $text = $self->_EncodeAmpsAndAngles($text);
- $text = $self->_DoItalicsAndBold($text);
-
- $text =~ s/\n/<br$self->{empty_element_suffix}\n/g;
-
- return $text;
-}
-
-# Override to check for HTML-escaped <>" chars.
-sub _StripLinkDefinitions {
-#
-# Strips link definitions from text, stores the URLs and titles in
-# hash references.
-#
- my ($self, $text) = @_;
- my $less_than_tab = $self->{tab_width} - 1;
-
- # Link defs are in the form: ^[id]: url "optional title"
- while ($text =~ s{
- ^[ ]{0,$less_than_tab}\[(.+)\]: # id = \$1
- [ \t]*
- \n? # maybe *one* newline
- [ \t]*
- (?:&lt;)?<a\s+href="(.+?)">\2</a>(?:&gt;)? # url = \$2
- [ \t]*
- \n? # maybe one newline
- [ \t]*
- (?:
- (?<=\s) # lookbehind for whitespace
- (?:&quot;|\()
- (.+?) # title = \$3
- (?:&quot;|\))
- [ \t]*
- )? # title is optional
- (?:\n+|\Z)
- }{}omx) {
- $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
- if ($3) {
- $self->{_titles}{lc $1} = $3;
- $self->{_titles}{lc $1} =~ s/"/&quot;/g;
- }
-
- }
-
- return $text;
-}
-
-# We need to look for HTML-escaped '<' and '>' (i.e. &lt; and &gt;).
-# We also remove Email linkification from the original implementation
-# as it is already done in Bugzilla's quoteUrls().
-sub _DoAutoLinks {
- my ($self, $text) = @_;
-
- $text =~ s{(?:<|&lt;)((?:https?|ftp):[^'">\s]+?)(?:>|&gt;)}{<a href="$1">$1</a>}gi;
- return $text;
-}
-
-# The main reasons for overriding this method are
-# resolving URL conflicts with Bugzilla's quoteUrls()
-# and also changing '"' to '&quot;' in regular expressions wherever needed.
-sub _DoAnchors {
-#
-# Turn Markdown link shortcuts into <a> tags.
-#
- my ($self, $text) = @_;
-
- # We revert linkifications of non-email links and only
- # those links whose URL and title are the same because
- # this way we can be sure that link is generated by quoteUrls()
- $text =~ s@<a \s+ href="(?! mailto ) (.+?)">\1</a>@$1@xmg;
-
- #
- # First, handle reference-style links: [link text] [id]
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ($g_nested_brackets) # link text = $2
- \]
-
- [ ]? # one optional space
- (?:\n[ ]*)? # one optional newline followed by spaces
-
- \[
- (.*?) # id = $3
- \]
- )
- }{
- my $whole_match = $1;
- my $link_text = $2;
- my $link_id = lc $3;
-
- if ($link_id eq "") {
- $link_id = lc $link_text; # for shortcut links like [this][].
- }
-
- $link_id =~ s{[ ]*\n}{ }g; # turn embedded newlines into spaces
-
- $self->_GenerateAnchor($whole_match, $link_text, $link_id);
- }xsge;
-
- #
- # Next, inline-style links: [link text](url "optional title")
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ($g_nested_brackets) # link text = $2
- \]
- \( # literal paren
- [ \t]*
- ($g_nested_parens) # href = $3
- [ \t]*
- ( # $4
- (&quot;|') # quote char = $5
- (.*?) # Title = $6
- \5 # matching quote
- [ \t]* # ignore any spaces/tabs between closing quote and )
- )? # title is optional
- \)
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- my $url = $3;
- my $title = $6;
-
- # Remove Bugzilla quoteUrls() linkification
- if ($url =~ /^a href="/ && $url =~ m|</a$|) {
- $url =~ s/^[^>]+>//;
- $url =~ s@</a$@@;
- }
-
- # Limit URL to HTTP/HTTPS links
- $url = "http://$url" unless $url =~ m!^https?://!i;
-
- $self->_GenerateAnchor($whole_match, $link_text, undef, $url, $title);
- }xsge;
-
- #
- # Last, handle reference-style shortcuts: [link text]
- # These must come last in case you've also got [link test][1]
- # or [link test](/foo)
- #
- $text =~ s{
- ( # wrap whole match in $1
- \[
- ([^\[\]]+) # link text = $2; can't contain '[' or ']'
- \]
- )
- }{
- my $result;
- my $whole_match = $1;
- my $link_text = $2;
- (my $link_id = lc $2) =~ s{[ ]*\n}{ }g; # lower-case and turn embedded newlines into spaces
-
- $self->_GenerateAnchor($whole_match, $link_text, $link_id);
- }xsge;
-
- return $text;
-}
-
-# The purpose of overriding this function is to add support
-# for a Github Flavored Markdown (GFM) feature called 'Multiple
-# underscores in words'. The standard markdown specification
-# specifies the underscore for making the text emphasized/bold.
-# However, some variable names in programming languages contain underscores
-# and we do not want a part of those variables to look emphasized/bold.
-# Instead, we render them as the way they originally are.
-sub _DoItalicsAndBold {
- my ($self, $text) = @_;
-
- # Handle at beginning of lines:
- $text =~ s{ (^__ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S)) }
- {
- my $result = _has_multiple_underscores($2) ? $1 : "<strong>$2</strong>";
- $result;
- }gsxe;
-
- $text =~ s{ ^\*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx;
-
- $text =~ s{ (^_ (?=\S) (.+?) (?<=\S) _ (?!\S)) }
- {
- my $result = _has_multiple_underscores($2) ? $1 : "<em>$2</em>";
- $result;
- }gsxe;
-
- $text =~ s{ ^\* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx;
-
- # <strong> must go first:
- $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (?!\S) ) }
- {
- my $result = _has_multiple_underscores($2) ? $1 : "<strong>$2</strong>";
- $result;
- }gsxe;
-
-
- $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx;
-
- $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (?!\S) ) }
- {
- my $result = _has_multiple_underscores($2) ? $1 : "<em>$2</em>";
- $result;
- }gsxe;
-
- $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx;
-
- # And now, a second pass to catch nested strong and emphasis special cases
- $text =~ s{ ( (?<=\W) __ (?=\S) (.+?[*_]*) (?<=\S) __ (\S*) ) }
- {
- my $result = _has_multiple_underscores($3) ? $1 : "<strong>$2</strong>$3";
- $result;
- }gsxe;
-
- $text =~ s{ (?<=\W) \*\* (?=\S) (.+?[*_]*) (?<=\S) \*\* }{<strong>$1</strong>}gsx;
- $text =~ s{ ( (?<=\W) _ (?=\S) (.+?) (?<=\S) _ (\S*) ) }
- {
- my $result = _has_multiple_underscores($3) ? $1 : "<em>$2</em>$3";
- $result;
- }gsxe;
-
- $text =~ s{ (?<=\W) \* (?=\S) (.+?) (?<=\S) \* }{<em>$1</em>}gsx;
-
- return $text;
-}
-
-sub _DoStrikethroughs {
- my ($self, $text) = @_;
-
- $text =~ s{ ^ ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{<del>$1</del>}gsx;
- $text =~ s{ (?<=_|[^~\w]) ~~ (?=\S) ([^~]+?) (?<=\S) ~~ (?!~) }{<del>$1</del>}gsx;
-
- return $text;
-}
-
-# The original _DoCodeSpans() uses the 's' modifier in its regex
-# which prevents _DoCodeBlocks() to match GFM fenced code blocks.
-# We copy the code from the original implementation and remove the
-# 's' modifier from it.
-sub _DoCodeSpans {
- my ($self, $text) = @_;
-
- $text =~ s@
- (?<!\\) # Character before opening ` can't be a backslash
- (`+) # $1 = Opening run of `
- (.+?) # $2 = The code block
- (?<!`)
- \1 # Matching closer
- (?!`)
- @
- my $c = "$2";
- $c =~ s/^[ \t]*//g; # leading whitespace
- $c =~ s/[ \t]*$//g; # trailing whitespace
- $c = $self->_EncodeCode($c);
- "<code>$c</code>";
- @egx;
-
- return $text;
-}
-
-# Override to add GFM Fenced Code Blocks
-sub _DoCodeBlocks {
- my ($self, $text) = @_;
-
- $text =~ s{
- ^ `{3,} [\s\t]* \n
- ( # $1 = the entire code block
- (?: .* \n+)+?
- )
- `{3,} [\s\t]* $
- }{
- my $codeblock = $1;
- my $result;
-
- $codeblock = $self->_EncodeCode($codeblock);
- $codeblock = $self->_Detab($codeblock);
- $codeblock =~ s/\n\z//; # remove the trailing newline
-
- $result = "\n\n<pre><code>" . $codeblock . "</code></pre>\n\n";
- $result;
- }egmx;
-
- # And now do the standard code blocks
- $text = $self->SUPER::_DoCodeBlocks($text);
-
- return $text;
-}
-
-sub _DoBlockQuotes {
- my ($self, $text) = @_;
-
- $text =~ s{
- ( # Wrap whole match in $1
- (?:
- ^[ \t]*&gt;[ \t]? # '>' at the start of a line
- .+\n # rest of the first line
- (?:.+\n)* # subsequent consecutive lines
- \n* # blanks
- )+
- )
- }{
- my $bq = $1;
- $bq =~ s/^[ \t]*&gt;[ \t]?//gm; # trim one level of quoting
- $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
- $bq = $self->_RunBlockGamut($bq, {wrap_in_p_tags => 1}); # recurse
- $bq =~ s/^/ /mg;
- # These leading spaces screw with <pre> content, so we need to fix that:
- $bq =~ s{(\s*<pre>.+?</pre>)}{
- my $pre = $1;
- $pre =~ s/^ //mg;
- $pre;
- }egs;
- "<blockquote>\n$bq\n</blockquote>\n\n";
- }egmx;
-
- return $text;
-}
-
-sub _EncodeCode {
- my ($self, $text) = @_;
-
- # We need to unescape the escaped HTML characters in code blocks.
- # These are the reverse of the escapings done in Bugzilla::Util::html_quote()
- $text =~ s/&lt;/</g;
- $text =~ s/&gt;/>/g;
- $text =~ s/&quot;/"/g;
- $text =~ s/&#64;/@/g;
- # '&amp;' substitution must be the last one, otherwise a literal like '&gt;'
- # will turn to '>' because '&' is already changed to '&amp;' in Bugzilla::Util::html_quote().
- # In other words, html_quote() will change '&gt;' to '&amp;gt;' and then we will
- # change '&amp;gt' -> '&gt;' -> '>' if we write this substitution as the first one.
- $text =~ s/&amp;/&/g;
- $text =~ s{<a \s+ href="(?:mailto:)? (.+?)"> \1 </a>}{$1}xmgi;
- $text = $self->SUPER::_EncodeCode($text);
- $text =~ s/~/$g_escape_table{'~'}/go;
- # Encode '&lt;' to prevent URLs from getting linkified in code spans
- $text =~ s/&lt;/$g_escape_table{'&lt;'}/go;
-
- return $text;
-}
-
-sub _EncodeBackslashEscapes {
- my ($self, $text) = @_;
-
- $text = $self->SUPER::_EncodeBackslashEscapes($text);
- $text =~ s/\\~/$g_escape_table{'~'}/go;
-
- return $text;
-}
-
-sub _UnescapeSpecialChars {
- my ($self, $text) = @_;
-
- $text = $self->SUPER::_UnescapeSpecialChars($text);
- $text =~ s/$g_escape_table{'~'}/~/go;
- $text =~ s/$g_escape_table{'&lt;'}/&lt;/go;
-
- return $text;
-}
-
-# Check if the passed string is of the form multiple_underscores_in_a_word.
-# To check that, we first need to make sure that the string does not contain
-# any white-space. Then, if the string is composed of non-space chunks which
-# are bound together with underscores, the string has the desired form.
-sub _has_multiple_underscores {
- my $string = shift;
- return 0 unless defined($string) && length($string);
- return 0 if $string =~ /[\t\s]+/;
- return 1 if scalar (split /_/, $string) > 1;
- return 0;
-}
-
-1;
-
-__END__
-
-=head1 NAME
-
-Bugzilla::Markdown - Generates HTML output from structured plain-text input.
-
-=head1 SYNOPSIS
-
- use Bugzilla::Markdown;
-
- my $markdown = Bugzilla::Markdown->new();
- print $markdown->markdown($text);
-
-=head1 DESCRIPTION
-
-Bugzilla::Markdown implements a Markdown engine that produces
-an HTML-based output from a given plain-text input.
-
-The majority of the implementation is done by C<Text::Markdown>
-CPAN module. It also applies the linkifications done in L<Bugzilla::Template>
-to the input resulting in an output which is a combination of both Markdown
-structures and those defined by Bugzilla itself.
-
-=head2 Accessors
-
-=over
-
-=item C<markdown>
-
-C<string> Produces an HTML-based output string based on the structures
-and format defined in the given plain-text input.
-
-=over
-
-=item B<Params>
-
-=over
-
-=item C<text>
-
-C<string> A plain-text string which includes Markdown structures.
-
-=back
-
-=back
-
-=back