package d::Templates;
use utf8;
use strict;
use Data::Dumper;
use AnomieBOT::API;
AnomieBOT::API::load('d::Nowiki');
use vars qw/@ISA/;
@ISA=qw/d::Nowiki/;
=pod
=head1 NAME
d::Templates - AnomieBOT template-handling decorator
=head1 SYNOPSIS
use AnomieBOT::API;
$api = new AnomieBOT::API('conf.ini', 1);
$api->decorators(qw/d::Templates/);
=head1 DESCRIPTION
C<d::Templates> contains template manipulating functions for use by an
AnomieBOT task. When "d::Templates" is used as a decorator on the API object,
the following methods are available.
In addition, all A<d::Nowiki> methods are also available, as this decorator
uses them internally.
=head1 METHODS PROVIDED
=over
=item $api->process_templates( $wikitext, $callback, $data )
Runs a parser over the wikitext, calling the callback function for each
template, magic word, or parser function found (basically anything encosed in
double-braces). The callback may return a replacement string, and the final
processed version is returned.
The callback function will be passed the following parameters:
=over
=item $name
The template name or the parser function/magic word invocation. For example,
"reflist" or "#tag:ref". Stripped of leading/trailing spaces and with the first
character uppercased.
=item $params
An array of the parameters. Spaces are not stripped, nor is there any attempt
to interpret named parameters.
=item $wikitext
The raw wikitext of the template.
=item $data
The data object passed in the original call.
=item $orig_name
C<$name> before the stripping and uppercasing.
=item $nl
Boolean, whether the template invocation immediately follows a newline. Possibly useful for working around T14974.
=back
Any non-C<undef> return value will be used to replace the original template.
=cut
sub process_templates {
my $api=shift;
my ($text,$nowiki)=$api->strip_nowiki(shift);
my $cb=shift;
my $data=shift;
my $notags=undef;
($text,$notags)=$api->strip_tags([$api->extension_tags], $text);
if(exists($notags->{$text})){
# The entire text was in one tag (probably because of a recursive call),
# so process the contents of that one tag.
$text=$notags->{$text};
$notags=undef;
} else {
while(my ($k,$v)=each %$notags){
next unless $text=~/\Q$k\E/;
$v=$api->replace_stripped($v,$nowiki);
$notags->{$k}=process_templates($api,$v,$cb,$data);
}
}
my @stack=();
while($text=~/(\{\{+|\}\}+|\[\[+|\]\]+|\|)/g){
my $ct=length($1);
my $i=pos($text)-$ct;
my $x=@stack?$stack[$#stack]:undef;
my $c=substr($1,0,1);
if($c eq "\x7b"){
# Found at least two open-braces
push @stack, {
char=>"\x7b",
start=>$i,
count=>$ct,
pstart=>$i+$ct,
params=>[]
};
$i+=$ct;
} elsif($c eq "\x5b"){
# Found at least two open-brackets
push @stack, {
char=>"\x5b",
start=>$i,
count=>$ct,
pstart=>$i+$ct,
params=>[]
};
$i+=$ct;
} elsif($c eq "\x7d" && defined($x) && $x->{'char'} eq "\x7b"){
# Found at least two close-braces, and we have at least one
# possible template/variable on the stack.
$ct=$x->{'count'} if $ct>$x->{'count'};
$i+=$ct;
push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$ct-$x->{'pstart'});
# First, pull out variables
if($ct>=3){
$x->{'count'}-=$ct-($ct%3);
$ct=$ct%3;
my $s=$x->{'start'}+$x->{'count'};
$x->{'params'}=[substr($text, $s, $i-$ct-$s)];
}
# Ok, any left is a template
if($ct>=2){
$x->{'count'}-=2;
$ct-=2;
my $s=$x->{'start'}+$x->{'count'};
my $orig=$api->replace_stripped(substr($text,$s,$i-$ct-$s), $notags, $nowiki);
map { $_=$api->replace_stripped($_, $notags, $nowiki); } @{$x->{'params'}};
my $name=shift @{$x->{'params'}};
my $oname=$name;
$name=~s/<!--.*?-->//g;
$name=~s/[\x{200e}\x{200f}\x{202a}-\x{202e}]//g; # MediaWiki strips these from titles
$name=~s/[\s_\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}]+/ /g; # Mediawiki considers all these as whitespace
$name=~s/^\s+|\s+$//g;
$name=~s/^Template\s*:\s*//ig;
$name=ucfirst($name);
my $ret=&$cb($name, $x->{'params'}, $orig, $data, $oname, ($s>0 && substr($text,$s-1,1) eq "\n")?1:0);
if(defined($ret)){
$ret="$ret";
# If we're completely removing the template and the
# template is the only thing on its line, remove the line
# too instead of leaving an empty one.
my $d=($ret eq '' && ($s==0 || substr($text,$s-1,1) eq "\n") && substr($text,$i-$ct,1) eq "\n")?1:0;
substr($text, $s, $i-$ct-$s+$d)=$ret;
$i=$s+length($ret)+$ct;
$x->{'params'}=[$ret];
} else {
$x->{'params'}=[$orig];
}
}
if($x->{'count'}<2){
pop @stack;
} else {
# The one we just completed might not be the end of the param,
# so reset the param array and pstart.
$x->{'params'}=[];
$x->{'pstart'}=$x->{'start'}+$x->{'count'};
}
} elsif($c eq "\x5d" && defined($x) && $x->{'char'} eq "\x5b"){
# Found at least two close-brackets, and we have at least one
# possible wikilink on the stack
# Eat however many brackets are matched
$ct=$x->{'count'} if $ct>$x->{'count'};
$i+=$ct;
$x->{'count'}-=$ct;
if($x->{'count'}<2){
pop @stack;
} else {
# The one we just completed might not be the end of the param,
# so reset the param array and pstart.
$x->{'params'}=[];
$x->{'pstart'}=$x->{'start'}+$x->{'count'};
}
} elsif($c eq '|' && defined($x)){
push @{$x->{'params'}}, substr($text, $x->{'pstart'}, $i-$x->{'pstart'});
$x->{'pstart'}=++$i;
} else {
$i++;
}
pos($text)=$i if !defined( pos($text) ) || pos($text) != $i;
}
return $api->replace_stripped($text, $notags, $nowiki);
}
=pod
=item $api->process_paramlist( @params )
Parse named parameters. Returns an array of objects having C<name>, C<oname>,
C<value>, and C<text> parameters. If the parameter was unnamed, C<oname> will
be undef and C<name> will be the calculated parameter number.
=cut
sub process_paramlist {
my $api=shift;
my @params=@_;
my @ret=();
my $idx=0;
foreach (@params){
# Normal unnamed params are easy to detect.
if(!/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }
# As long as the naive "name" part doesn't contain the start of a
# template or a tag or an internal link, it's correct.
if(/^(\s*([^=<\x5b\x7b]*?)\s*)=\s*(.*?)\s*$/s){ push @ret, { oname=>$1, name=>$2, value=>$3, text=>$_ }; next; }
# Must be complicated now, the name can contain an equals if it's
# inside a template, a parameter, an internal link, or a comment. Also,
# replaced tags just completely screw things up (the parameter name
# ends up containing the unique token, which is probably impossible to
# actually _use_ as a parameter).
my ($text,$nowiki)=$api->strip_tags([$api->extension_tags], $_);
# Just comments/nowikis/refs fixed it?
if($text!~/=/){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }
if($text=~/^\s*([^=\x5b\x7b]*?)\s*=\s*(.*?)\s*$/s){
my ($oname,$name,$v)=($1,$1,$2);
$oname=$api->replace_stripped($oname, $nowiki);
$name=$api->replace_stripped($name, $nowiki);
$v=$api->replace_stripped($v, $nowiki);
push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };
next;
}
# No, there must be a template or link in there somewhere...
my @stack=();
my $i=0;
my $len=length($text);
while($i<$len){
my $x=@stack?$stack[$#stack]:undef;
my $xb=undef;
map { $xb=$_ if $_->{'char'} eq "\x5b" } @stack;
if(substr($text,$i,2) eq "\x7b\x7b" || substr($text,$i,2) eq "\x5b\x5b"){
# Found at least two open-braces/brackets
my $ct;
my $c=substr($text,$i,1);
for($ct=2; substr($text,$i+$ct,1) eq $c; $ct++){}
push @stack, {
char=>$c,
start=>$i,
count=>$ct,
};
$i+=$ct;
} elsif(defined($x) && $x->{'char'} eq "\x7b" && substr($text,$i,2) eq "\x7d\x7d"){
# Found at least two close-braces, and we have at least one
# possible template/variable on the stack.
my $ct;
for($ct=2; substr($text,$i+$ct,1) eq "\x7d"; $ct++){}
$ct=$x->{'count'} if $ct>$x->{'count'};
$i+=$ct;
while($ct>=3){
$x->{'count'}-=3;
$ct-=3;
}
while($ct>=2){
$x->{'count'}-=2;
$ct-=2;
}
if($x->{'count'}<2){
pop @stack;
}
} elsif(defined($xb) && substr($text,$i,2) eq "\x5d\x5d"){
# Found at least two close-brackets, and we have at least one
# possible wikilink on the stack
# Drop any pending templates, they're not really templates
while($stack[$#stack] ne $xb){
pop @stack;
}
# Eat however many brackets are matched
my $ct;
for($ct=2; substr($text,$i+$ct,1) eq "\x5d"; $ct++){}
$ct=$xb->{'count'} if $ct>$xb->{'count'};
$i+=$ct;
$xb->{'count'}-=$ct;
if($xb->{'count'}<2){
pop @stack;
}
} elsif(!defined($x) && substr($text,$i,1) eq '='){
# Found the equals!
last;
} else {
$i++;
}
}
if($i>=$len){ push @ret, { oname=>undef, name=>++$idx, value=>$_, text=>$_ }; next; }
my $oname=substr($text,0,$i);
$oname=$api->replace_stripped($oname, $nowiki);
my $name=$oname; $name=~s/^\s+|\s+$//g;
my $v=substr($text,$i+1);
$v=~s/^\s+|\s+$//g;
$v=$api->replace_stripped($v, $nowiki);
push @ret, { oname=>$oname, name=>$name, value=>$v, text=>$_ };
next;
}
return @ret;
}
=pod
=item $api->strip_templates( $wikitext, \&callback, $data )
=item $api->strip_templates( $wikitext, \&callback, $data, \%matches )
Runs a parser over the wikitext, calling the callback function for each
template, magic word, or parser function found (basically anything encosed in
double-braces). If the callback returns a true value, the template is replaced
by an opaque token.
The callback function will be passed the same parameters as for
C<process_templates>. The return value is the same as for C<strip_regex> from
the A<d:Nowiki> decorator.
=item $api->strip_templates( $wikitext, \@templates )
=item $api->strip_templates( $wikitext, \@templates, \%matches )
As above, with a callback function that just tests whether the C<$name> is in
(or matches a regex in) the provided array.
=cut
sub strip_templates {
my $api=shift;
my $text=shift;
my $cb=shift;
my $data;
if(ref($cb) eq 'ARRAY'){
$data=$cb;
$cb=\&_strip_templates_in_list;
} else {
$data=shift;
}
my $mapping=shift // {};
$text=$api->process_templates($text, sub {
return undef unless &$cb(@_);
my $x=$api->replace_stripped($_[2],$mapping);
my $tag=$api->get_token_for($x);
$mapping->{$tag}=$x;
return $tag;
}, $data);
return wantarray ? ($text,$mapping) : $text;
}
sub _strip_templates_in_list {
return grep(ref($_) ? $_[0]=~/$_/ : $_[0] eq $_, @{$_[3]});
}
1;
=pod
=back
=head1 COPYRIGHT
Copyright 2008–2013 Anomie
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.