Improve triggers for ReverseComplement goodie

master
David Wilkins 2014-07-01 18:13:42 +08:00
parent a30edc6d5e
commit fbf46d35d9
2 changed files with 29 additions and 24 deletions

View File

@ -4,7 +4,7 @@ package DDG::Goodie::ReverseComplement;
use DDG::Goodie;
use feature 'state';
triggers startend => 'reverse complement', 'revcomp';
triggers any => 'reverse complement', 'revcomp';
zci is_cached => 1;
name 'Reverse Complement';
@ -17,33 +17,36 @@ attribution github => ['http://github.com/wilkox', 'wilkox'];
handle remainder => sub {
#Remove 'of' if supplied
$_ =~ s/^of\s//g;
my $sequence = $_;
my $sequence = $_;
#Remove extra words if supplied
$sequence =~ s/\bof\b//gi;
$sequence =~ s/\bsequence\b//gi;
$sequence =~ s/\b[DR]NA\b//gi;
$sequence =~ s/\bnucleotide\b//gi;
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Return nothing if sequence contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return if $sequence =~ /[^ATCGURYKMSWBVDHN]/;
#Return nothing if sequence contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return if $sequence =~ /[^ATCGURYKMSWBVDHN]/;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#Complement, using standard IUPAC codes
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
#Complement, using standard IUPAC codes
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
#Reverse
$sequence = reverse($sequence);
#Reverse
$sequence = reverse($sequence);
return $sequence, html => wrap_html('DNA reverse complement:', $sequence);
return $sequence, html => wrap_html('DNA reverse complement:', $sequence);
};
# This function adds some HTML and styling to our output

View File

@ -13,19 +13,21 @@ ddg_goodie_test(
DDG::Goodie::ReverseComplement
)],
#Basic DNA reverse complements, all possible trigger combinations
#Basic DNA reverse complements, various trigger combinations
'AAAACCCGGT reverse complement' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", html => qr/ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA/),
'AAAACCCGGT revcomp' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
'revcomp AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
'revcomp of AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
'DNA revcomp of sequence AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
'reverse complement of RNA sequence AAAACCCGGU' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
#RNA reverse complement with acceptable spacing characters
'reverse complement uca gac gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
'reverse complement uca-gac-gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
#With ambiguous bases (both DNA and RNA)
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA' => test_zci("TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA", html => qr/TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA/),
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence' => test_zci("TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA", html => qr/TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA/),
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV' => test_zci("BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG", html => qr/BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG/),
#Mix of DNA and RNA bases (should return empty, as it is more likely that this