ReverseComplement: Switching to text template

master
Rob Emery 2016-05-18 22:20:30 +01:00
parent e71eb3c5d4
commit 3c152febc2
2 changed files with 65 additions and 98 deletions

View File

@ -12,39 +12,41 @@ zci is_cached => 1;
handle remainder => sub { handle remainder => sub {
my $sequence = $_; my $sequence = $_;
#Remove extra words if supplied
$sequence =~ s/\bof\b//gi;
$sequence =~ s/\bsequence\b//gi;
$sequence =~ s/\b[DR]NA\b//gi;
$sequence =~ s/\bnucleotide\b//gi;
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Return nothing if sequence does not contains characters or contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
my $normalized_seq = $sequence;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#Remove extra words if supplied #Complement, using standard IUPAC codes
$sequence =~ s/\bof\b//gi; $sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
$sequence =~ s/\bsequence\b//gi;
$sequence =~ s/\b[DR]NA\b//gi;
$sequence =~ s/\bnucleotide\b//gi;
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Return nothing if sequence does not contains characters or contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
my $normalized_seq = $sequence;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#Complement, using standard IUPAC codes #Reverse
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/; $sequence = reverse($sequence);
#Reverse return $sequence, structured_answer => {
$sequence = reverse($sequence); data => {
title => $sequence,
return $sequence, subtitle => "Nucleotide reverse complement: $normalized_seq"
structured_answer => { },
input => [$normalized_seq], templates => {
operation => 'Nucleotide reverse complement', group => 'text'
result => $sequence }
}; };
}; };

95
t/ReverseComplement.t Normal file → Executable file
View File

@ -9,82 +9,47 @@ use DDG::Test::Goodie;
zci answer_type => 'reverse_complement'; zci answer_type => 'reverse_complement';
zci is_cached => 1; zci is_cached => 1;
my @aaaacccggt = ( my @aaaacccggt = ("ACCGGGTTTT",'AAAACCCGGT');
"ACCGGGTTTT",
structured_answer => { sub build_test {
input => ['AAAACCCGGT'], my ($answer, $input) = @_;
operation => 'Nucleotide reverse complement', return test_zci($answer, structured_answer => {
result => 'ACCGGGTTTT' data => {
}); title => $answer,
subtitle => "Nucleotide reverse complement: $input"
},
templates => {
group => 'text'
}
})
}
ddg_goodie_test( ddg_goodie_test(
[qw( DDG::Goodie::ReverseComplement)], [qw( DDG::Goodie::ReverseComplement)],
#Basic DNA reverse complements, various trigger combinations #Basic DNA reverse complements, various trigger combinations
'AAAACCCGGT reverse complement' => test_zci(@aaaacccggt), 'AAAACCCGGT reverse complement' => build_test(@aaaacccggt),
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci( 'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => build_test("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", 'TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'),
"ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", 'AAAACCCGGT revcomp' => build_test(@aaaacccggt),
structured_answer => { 'revcomp AAAACCCGGT' => build_test(@aaaacccggt),
input => ['TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'], 'revcomp of AAAACCCGGT' => build_test(@aaaacccggt),
operation => 'Nucleotide reverse complement', 'DNA revcomp of sequence AAAACCCGGT' => build_test(@aaaacccggt),
result => 'ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA' 'reverse complement of RNA sequence AAAACCCGGU' => build_test("ACCGGGTTTT", 'AAAACCCGGU'),
}
),
'AAAACCCGGT revcomp' => test_zci(@aaaacccggt),
'revcomp AAAACCCGGT' => test_zci(@aaaacccggt),
'revcomp of AAAACCCGGT' => test_zci(@aaaacccggt),
'DNA revcomp of sequence AAAACCCGGT' => test_zci(@aaaacccggt),
'reverse complement of RNA sequence AAAACCCGGU' => test_zci(
"ACCGGGTTTT",
structured_answer => {
input => ['AAAACCCGGU'],
operation => 'Nucleotide reverse complement',
result => 'ACCGGGTTTT'
}
),
#RNA reverse complement with acceptable spacing characters #RNA reverse complement with acceptable spacing characters
'reverse complement uca gac gga' => test_zci( 'reverse complement uca gac gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
"TCCGTCTGA", 'reverse complement of nucleotide sequence uca-gac-gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
structured_answer => {
input => ['UCAGACGGA'],
operation => 'Nucleotide reverse complement',
result => 'TCCGTCTGA',
}
),
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci(
"TCCGTCTGA",
structured_answer => {
input => ['UCAGACGGA'],
operation => 'Nucleotide reverse complement',
result => 'TCCGTCTGA',
}
),
#With ambiguous bases (both DNA and RNA) #With ambiguous bases (both DNA and RNA)
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence' 'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence'
=> test_zci( => build_test(
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA", "TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
structured_answer => { 'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
input => [ ),
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
],
operation => 'Nucleotide reverse complement',
result =>
'TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA'
},
),
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV' 'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
=> test_zci( => build_test(
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG", "BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
structured_answer => { 'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV',
input => [
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
],
operation => 'Nucleotide reverse complement',
result =>
'BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG'
},
), ),
#Mix of DNA and RNA bases (should return empty, as it is more likely that this #Mix of DNA and RNA bases (should return empty, as it is more likely that this