2012-05-20 01:09:56 +12:00
|
|
|
#!/usr/bin/env perl
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
# Script to convert ABP filters to Opera urlfilter and a CSS with a hiding rule
|
2012-05-20 01:09:56 +12:00
|
|
|
# Copyright (C) 2012 anonymous74100
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
2012-05-25 00:39:56 +12:00
|
|
|
use File::Spec;
|
|
|
|
use File::Slurp;
|
2012-07-15 00:01:59 +12:00
|
|
|
use Pod::Usage;
|
|
|
|
use Getopt::Long qw(:config no_auto_abbrev auto_help);
|
2012-05-25 00:39:56 +12:00
|
|
|
use feature 'unicode_strings';
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
# Set defaults
|
2012-10-14 00:27:22 +13:00
|
|
|
my $urlfilterfile = my $cssfile = my $nourlfilter = my $nocss = my $newsyntax = my $nocomments = my $everythingisfirstparty = my $ignorewhitelist = '';
|
2012-07-15 00:01:59 +12:00
|
|
|
my @customcssfile;
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
# Get command line options
|
2012-10-13 23:18:09 +13:00
|
|
|
GetOptions ('urlfilter=s' => \$urlfilterfile,
|
|
|
|
'css=s' => \$cssfile,
|
|
|
|
'addcustomcss=s{,}' => \@customcssfile,
|
|
|
|
'nourlfilter' => \$nourlfilter,
|
|
|
|
'nocss' => \$nocss,
|
|
|
|
'new' => \$newsyntax,
|
|
|
|
'nocomments' => \$nocomments,
|
2012-10-14 00:27:22 +13:00
|
|
|
'everythingisfirstparty' => \$everythingisfirstparty,
|
|
|
|
'ignorewhitelist' => \$ignorewhitelist)
|
2012-07-15 00:01:59 +12:00
|
|
|
or die pod2usage(" ");
|
|
|
|
|
|
|
|
|
|
|
|
die pod2usage("$0: No files specified.\n") if (@ARGV == 0);
|
|
|
|
die pod2usage("$0: Too many files specified.\n") if (@ARGV > 1);
|
2012-05-25 00:39:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
my $filename = '';
|
|
|
|
$filename = $ARGV[0]; # Get filename
|
2012-05-25 00:39:56 +12:00
|
|
|
die "Specified file: $filename doesn't exist!\n" unless (-e $filename);
|
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
unless ($urlfilterfile and $cssfile)
|
|
|
|
{
|
|
|
|
my ($volume,$directories,$file) = File::Spec->splitpath($filename);
|
|
|
|
my $path = $volume.$directories; # Get ABP list path
|
|
|
|
|
|
|
|
$urlfilterfile = $path."urlfilter.ini" unless $urlfilterfile; # Set urlfilter file name
|
|
|
|
$cssfile = $path."element-filter.css" unless $cssfile; # Set css file name
|
|
|
|
}
|
2012-05-25 00:39:56 +12:00
|
|
|
|
|
|
|
|
|
|
|
die "No lists generated!\n" if ($nocss and $nourlfilter);
|
|
|
|
|
|
|
|
|
|
|
|
my $list = read_file($filename, binmode => ':utf8' ); # Read ABP list
|
|
|
|
|
|
|
|
$list =~ s/\r\n/\n/gm; # Remove CR from CR+LF line endings
|
|
|
|
$list =~ s/\r/\n/gm; # Convert CR line endings to LF
|
2012-07-15 00:01:59 +12:00
|
|
|
$list =~ s/^!.*\n//gm if $nocomments; # Remove comments
|
2012-05-25 00:39:56 +12:00
|
|
|
|
|
|
|
my $urlfilter = createUrlfilter($list) unless $nourlfilter;
|
|
|
|
my $elemfilter = createElemfilter($list) unless $nocss;
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
|
|
|
# Warn if a file won't be generated
|
2012-10-13 23:18:09 +13:00
|
|
|
print "Urlfilter won't be generated!\n" if (!$urlfilter and !$nourlfilter);
|
|
|
|
print "CSS won't be generated!\n" if (!$elemfilter and !$nocss);
|
2012-05-25 00:39:56 +12:00
|
|
|
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
# Write generated files
|
2012-05-25 00:39:56 +12:00
|
|
|
write_file($urlfilterfile, {binmode => ':utf8'}, $urlfilter) unless ($nourlfilter or !$urlfilter);
|
|
|
|
write_file($cssfile, {binmode => ':utf8'}, $elemfilter) unless ($nocss or !$elemfilter);
|
|
|
|
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub createUrlfilter
|
|
|
|
{
|
|
|
|
my $list = shift;
|
|
|
|
|
|
|
|
# Get old checksum and modification time
|
2012-05-25 00:39:56 +12:00
|
|
|
my $oldchecksum = my $oldmodified = '';
|
|
|
|
if (-e $urlfilterfile)
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-05-25 00:39:56 +12:00
|
|
|
my $oldlist = read_file($urlfilterfile, binmode => ':utf8' );
|
|
|
|
$oldchecksum = $1 if $oldlist =~ m/(Checksum:.*)$/mi;
|
|
|
|
$oldmodified = $1 if $oldlist =~ m/((Last modified|Updated):.*)$/mi;
|
2012-05-20 01:09:56 +12:00
|
|
|
}
|
|
|
|
|
2012-10-14 00:27:22 +13:00
|
|
|
my $whitelists = join("\n", ($list =~ m/^@@.*$/gm)) unless $ignorewhitelist; # Collect whitelists
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^\[.+\]\n//m; # Remove ABP header
|
|
|
|
$list =~ s/^@@.*\n?//gm; # Remove whitelists
|
|
|
|
$list =~ s/^.*##.*\n?//gm; # Remove element filters
|
2012-10-13 23:18:09 +13:00
|
|
|
|
|
|
|
$list =~ s/^(.*[^\*])(\*?)\$script$/$1\*\.js\*/gm; # Convert filters with script type
|
|
|
|
$list =~ s/^(.*[^\*])(\*?)\$stylesheet$/$1\*\.css\*/gm; # Convert filters with stylesheet type
|
|
|
|
|
|
|
|
$list =~ s/^(.*)\$third-party$/$1/gm if $everythingisfirstparty;
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^.*\$.*\n?//gm; # Remove filters with types
|
|
|
|
|
|
|
|
$list =~ s/^!/;/gm; # Convert comments
|
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
return '' if ((scalar(split(m/^(?!;|$)/m,$list)) - 1) < 1); # Return empty list if it doesn't have anything but comments
|
2012-05-25 00:39:56 +12:00
|
|
|
|
|
|
|
$list =~ s/^(;\s*)Title:\s/$1/mi; # Normalize title
|
|
|
|
$list =~ s/^(;\s*Redirect.*\n)//gmi; # Remove redirect comment
|
|
|
|
|
|
|
|
$list =~ s/^(;\s*)(Checksum:.*)$/$1$oldchecksum/mi if $oldchecksum; # Insert old checksum
|
|
|
|
$list =~ s/^(;\s*)((Last modified|Updated):.*)$/$1$oldmodified/mi if $oldmodified; # Insert old modification date/time
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^([^;|*].*$)/\*$1/gm; # Add beginning asterisk
|
2012-07-15 00:01:59 +12:00
|
|
|
$list =~ s/^([^;]\S*[^|*])\n/$1\*\n/gm; # Add ending asterisk
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^\|([^|].*)$/$1/gm; # Remove beginning pipe
|
|
|
|
$list =~ s/^([^;].*)\|$/$1/gm; # Remove ending pipe
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
# Parse whitelists
|
2012-10-14 00:27:22 +13:00
|
|
|
unless ($ignorewhitelist)
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-10-14 00:27:22 +13:00
|
|
|
my $urlfilter = my $matcheswhitelist = '';
|
|
|
|
|
|
|
|
$whitelists =~ s/^@@//gm; # Remove whitelist symbols
|
|
|
|
$whitelists =~ s/^\|\|//gm; # Remove vertical bars
|
|
|
|
$whitelists =~ s/\^$//gm; # Remove ending caret
|
|
|
|
$whitelists =~ s/\^/\//gm; # Convert caret to slash
|
|
|
|
$whitelists =~ s/^.*\$elemhide.*\n?//gm; # Remove element whitelists
|
|
|
|
$whitelists =~ s/\$.*//gm; # Remove everything after a dollar sign
|
|
|
|
$whitelists =~ s/^\*//gm; # Remove beginning asterisk
|
|
|
|
$whitelists =~ s/\*$//gm; # Remove ending asterisk
|
|
|
|
|
|
|
|
foreach my $line (split(/\n/, $list))
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-10-14 00:27:22 +13:00
|
|
|
# Remove filters that require whitelists
|
|
|
|
my $tmpline = $line;
|
|
|
|
unless ($line =~ m/^;/)
|
|
|
|
{
|
|
|
|
$tmpline =~ s/^\|\|//; # Remove pipes
|
|
|
|
$tmpline =~ s/\^$//; # Remove ending caret
|
|
|
|
$tmpline =~ s/\^/\//; # Convert caret to slash
|
|
|
|
$tmpline =~ s/\$.*//; # Remove everything after a dollar sign
|
|
|
|
$tmpline =~ s/^\*//; # Remove beginning asterisk
|
|
|
|
$tmpline =~ s/\*$//; # Remove ending asterisk
|
|
|
|
|
|
|
|
$matcheswhitelist = 1 if (($tmpline =~ m/\Q$whitelists\E/gmi) or ($whitelists =~ m/\Q$tmpline\E/gmi));
|
|
|
|
}
|
|
|
|
|
|
|
|
$urlfilter = $urlfilter."$line\n" unless $matcheswhitelist;
|
|
|
|
$matcheswhitelist = '';
|
2012-05-20 01:09:56 +12:00
|
|
|
}
|
2012-05-25 00:39:56 +12:00
|
|
|
$list = $urlfilter;
|
2012-10-14 00:27:22 +13:00
|
|
|
}
|
|
|
|
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
return '' if ((scalar(split(m/^(?!;|$)/m,$list)) - 1) < 1); # Return empty list if it doesn't have anything but comments
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
unless ($newsyntax)
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^\|\|(.*)/\*:\/\/$1\n\*\.$1/gm; # Remove pipes and add protocol and add a filter with subdomain
|
|
|
|
$list =~ s/^([^;].*)\^/$1\//gm; # Convert caret to slash
|
2012-05-20 01:09:56 +12:00
|
|
|
}
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
# Add urlfilter header
|
|
|
|
unless ($nocomments)
|
|
|
|
{
|
|
|
|
$list =~ s/^(;\s*)\n/\[prefs\]\nprioritize excludelist=1\n\[include\]\n\*\n\[exclude\]\n$1\n/m;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$list = "[prefs]\nprioritize excludelist=1\n[include]\n*\n[exclude]\n".$list;
|
|
|
|
}
|
2012-05-25 00:39:56 +12:00
|
|
|
|
|
|
|
return $list;
|
2012-05-20 01:09:56 +12:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
sub createElemfilter
|
|
|
|
{
|
|
|
|
my $list = shift;
|
|
|
|
|
|
|
|
# Get old checksum and modification time
|
2012-05-25 00:39:56 +12:00
|
|
|
my $oldchecksum = my $oldmodified = '';
|
|
|
|
if (-e $cssfile)
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-05-25 00:39:56 +12:00
|
|
|
my $oldlist = read_file($cssfile, binmode => ':utf8' );
|
|
|
|
$oldchecksum = $1 if $oldlist =~ m/(Checksum:.*)$/mi;
|
|
|
|
$oldmodified = $1 if $oldlist =~ m/((Last modified|Updated):.*)$/mi;
|
2012-05-20 01:09:56 +12:00
|
|
|
}
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^(?!##|!).*\n?//gm; # Leave only generic element filters and comments
|
2012-05-20 01:09:56 +12:00
|
|
|
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^(!\s*)Title:\s/$1/mi; # Normalize title
|
|
|
|
$list =~ s/^(!\s*Redirect.*\n)//gmi; # Remove redirect comment
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^(!\s*)(Checksum:.*)$/$1$oldchecksum/mi if $oldchecksum; # Insert old checksum
|
|
|
|
$list =~ s/^(!\s*)((Last modified|Updated):.*)$/$1$oldmodified/mi if $oldmodified; # Insert old modification date/time
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^##//gm; # Remove beginning number signs
|
|
|
|
$list =~ s/(^[^!].*[\[.#])/\L$1/gmi; # Convert tags to lowercase
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/^((?!\/\*|\*\/|\!).*[^,])\s*$/$1,/gm; # Add commas
|
2012-07-15 00:01:59 +12:00
|
|
|
|
|
|
|
|
|
|
|
return '' if ((scalar(split(m/^(?!\!|$)/m,$list)) - 1) < 1 and !@customcssfile); # Return empty list if it doesn't have anything but comments
|
|
|
|
|
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$list =~ s/(^[^!].*),\s*$/$1/ms; # Remove last comma
|
2012-07-15 00:01:59 +12:00
|
|
|
$list = $list." { display: none !important; }\n" unless ((scalar(split(m/^([^!])/m,$list)) - 1) < 1); # Add CSS rule if list has anything besides comments
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
# Add xml namespace declaration
|
|
|
|
unless ($nocomments)
|
|
|
|
{
|
|
|
|
$list =~ s/^(!\s*?)\n/\@namespace "http:\/\/www.w3.org\/1999\/xhtml";\n$1\n/m;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$list = '@namespace "http://www.w3.org/1999/xhtml";'."\n".$list;
|
|
|
|
}
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
|
|
|
|
# Convert comments
|
|
|
|
unless ($nocomments)
|
2012-05-20 01:09:56 +12:00
|
|
|
{
|
2012-07-15 00:01:59 +12:00
|
|
|
my $tmplist = my $previousline = '';
|
|
|
|
foreach my $line (split(/\n/, $list))
|
|
|
|
{
|
|
|
|
$tmplist = $tmplist."/*\n" if (($previousline !~ m/^!/) and ($line =~ m/^!/));
|
|
|
|
$tmplist = $tmplist."*/\n" if (($previousline =~ m/^!/) and ($line !~ m/^!/));
|
|
|
|
$tmplist = $tmplist.$line."\n";
|
|
|
|
$previousline = $line;
|
|
|
|
}
|
|
|
|
$list = $tmplist;
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach (@customcssfile)
|
|
|
|
{
|
|
|
|
next unless (-e $_); # Skip file if it doesn't exist
|
|
|
|
my $customcss = read_file($_, binmode => ':utf8' ); # Read custom CSS file
|
2012-05-25 00:39:56 +12:00
|
|
|
$customcss =~ s/\r\n/\n/gm; # Remove CR from CR+LF line endings
|
|
|
|
$customcss =~ s/\r/\n/gm; # Convert CR line endings to LF
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-05-25 00:39:56 +12:00
|
|
|
$customcss =~ s/^@.*\n//gm; # Remove at-rules
|
|
|
|
$list = $list."\n".$customcss; # Add custom CSS to list
|
|
|
|
}
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
return '' if ((scalar(split(m/^(?!\/\*|\*\/|!|\@namespace|$)/m,$list)) - 1) < 1); # Return empty list if it doesn't have anything but comments and at-rules
|
|
|
|
|
|
|
|
return $list;
|
|
|
|
}
|
|
|
|
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
__END__
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
=head1 SYNOPSIS
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
createOperaFilters.pl [file] [options]
|
2012-05-20 01:09:56 +12:00
|
|
|
|
2012-07-15 00:01:59 +12:00
|
|
|
Options:
|
|
|
|
--nocss - don't create element-filter.css
|
|
|
|
--nourlfilter - don't create urlfilter.ini
|
|
|
|
--urlfilter [file] - specify urlfilter filename
|
|
|
|
--css [file] - specify CSS filename
|
|
|
|
--addcustomcss [file ...] - specify custom CSS file(s) to combine with converted CSS file
|
|
|
|
--new - use new syntax
|
|
|
|
--nocomments - don't put comments in generated files
|
2012-10-13 23:18:09 +13:00
|
|
|
--everythingisfirstparty - parse third party filters as first party filters
|
2012-10-14 00:27:22 +13:00
|
|
|
--ignorewhitelist - don't parse whitelists
|
2012-07-15 00:01:59 +12:00
|
|
|
--help - brief help message
|
|
|
|
|
|
|
|
|
|
|
|
=cut
|