Improve triggers for ReverseComplement goodie
parent
a30edc6d5e
commit
fbf46d35d9
|
@ -4,7 +4,7 @@ package DDG::Goodie::ReverseComplement;
|
|||
use DDG::Goodie;
|
||||
use feature 'state';
|
||||
|
||||
triggers startend => 'reverse complement', 'revcomp';
|
||||
triggers any => 'reverse complement', 'revcomp';
|
||||
zci is_cached => 1;
|
||||
|
||||
name 'Reverse Complement';
|
||||
|
@ -17,33 +17,36 @@ attribution github => ['http://github.com/wilkox', 'wilkox'];
|
|||
|
||||
handle remainder => sub {
|
||||
|
||||
#Remove 'of' if supplied
|
||||
$_ =~ s/^of\s//g;
|
||||
my $sequence = $_;
|
||||
|
||||
my $sequence = $_;
|
||||
#Remove extra words if supplied
|
||||
$sequence =~ s/\bof\b//gi;
|
||||
$sequence =~ s/\bsequence\b//gi;
|
||||
$sequence =~ s/\b[DR]NA\b//gi;
|
||||
$sequence =~ s/\bnucleotide\b//gi;
|
||||
|
||||
#Remove whitespace and dashes and make uppercase
|
||||
$sequence =~ s/\s|-//g;
|
||||
$sequence = uc($sequence);
|
||||
#Remove whitespace and dashes and make uppercase
|
||||
$sequence =~ s/\s|-//g;
|
||||
$sequence = uc($sequence);
|
||||
|
||||
#Return nothing if sequence contains characters
|
||||
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
||||
return if $sequence =~ /[^ATCGURYKMSWBVDHN]/;
|
||||
#Return nothing if sequence contains characters
|
||||
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
||||
return if $sequence =~ /[^ATCGURYKMSWBVDHN]/;
|
||||
|
||||
#DNA contains thymine (T) but not uracil (U);
|
||||
# RNA contains U but not T (with some extremely
|
||||
# rare exceptions). Hence, if the sequence
|
||||
# contains both U and T it's more likely to be an
|
||||
# error than a real molecule so should return nothing.
|
||||
return if $sequence =~ /T/ && $sequence =~ /U/;
|
||||
#DNA contains thymine (T) but not uracil (U);
|
||||
# RNA contains U but not T (with some extremely
|
||||
# rare exceptions). Hence, if the sequence
|
||||
# contains both U and T it's more likely to be an
|
||||
# error than a real molecule so should return nothing.
|
||||
return if $sequence =~ /T/ && $sequence =~ /U/;
|
||||
|
||||
#Complement, using standard IUPAC codes
|
||||
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
||||
#Complement, using standard IUPAC codes
|
||||
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
||||
|
||||
#Reverse
|
||||
$sequence = reverse($sequence);
|
||||
#Reverse
|
||||
$sequence = reverse($sequence);
|
||||
|
||||
return $sequence, html => wrap_html('DNA reverse complement:', $sequence);
|
||||
return $sequence, html => wrap_html('DNA reverse complement:', $sequence);
|
||||
};
|
||||
|
||||
# This function adds some HTML and styling to our output
|
||||
|
|
|
@ -13,19 +13,21 @@ ddg_goodie_test(
|
|||
DDG::Goodie::ReverseComplement
|
||||
)],
|
||||
|
||||
#Basic DNA reverse complements, all possible trigger combinations
|
||||
#Basic DNA reverse complements, various trigger combinations
|
||||
'AAAACCCGGT reverse complement' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", html => qr/ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA/),
|
||||
'AAAACCCGGT revcomp' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
'revcomp AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
'revcomp of AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
'DNA revcomp of sequence AAAACCCGGT' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
'reverse complement of RNA sequence AAAACCCGGU' => test_zci("ACCGGGTTTT", html => qr/ACCGGGTTTT/),
|
||||
|
||||
#RNA reverse complement with acceptable spacing characters
|
||||
'reverse complement uca gac gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
|
||||
'reverse complement uca-gac-gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
|
||||
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci("TCCGTCTGA", html => qr/TCCGTCTGA/),
|
||||
|
||||
#With ambiguous bases (both DNA and RNA)
|
||||
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA' => test_zci("TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA", html => qr/TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA/),
|
||||
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence' => test_zci("TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA", html => qr/TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA/),
|
||||
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV' => test_zci("BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG", html => qr/BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG/),
|
||||
|
||||
#Mix of DNA and RNA bases (should return empty, as it is more likely that this
|
||||
|
|
Loading…
Reference in New Issue