ReverseComplement: Switching to text template

master
Rob Emery 2016-05-18 22:20:30 +01:00
parent e71eb3c5d4
commit 3c152febc2
2 changed files with 65 additions and 98 deletions

View File

@ -12,39 +12,41 @@ zci is_cached => 1;
handle remainder => sub {
my $sequence = $_;
#Remove extra words if supplied
$sequence =~ s/\bof\b//gi;
$sequence =~ s/\bsequence\b//gi;
$sequence =~ s/\b[DR]NA\b//gi;
$sequence =~ s/\bnucleotide\b//gi;
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Return nothing if sequence does not contains characters or contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
my $normalized_seq = $sequence;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#Complement, using standard IUPAC codes
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
#Reverse
$sequence = reverse($sequence);
return $sequence,
structured_answer => {
input => [$normalized_seq],
operation => 'Nucleotide reverse complement',
result => $sequence
my $sequence = $_;
#Remove extra words if supplied
$sequence =~ s/\bof\b//gi;
$sequence =~ s/\bsequence\b//gi;
$sequence =~ s/\b[DR]NA\b//gi;
$sequence =~ s/\bnucleotide\b//gi;
#Remove whitespace and dashes and make uppercase
$sequence =~ s/\s|-//g;
$sequence = uc($sequence);
#Return nothing if sequence does not contains characters or contains characters
# other than DNA/RNA bases or standard IUPAC ambiguity codes
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
my $normalized_seq = $sequence;
#DNA contains thymine (T) but not uracil (U);
# RNA contains U but not T (with some extremely
# rare exceptions). Hence, if the sequence
# contains both U and T it's more likely to be an
# error than a real molecule so should return nothing.
return if $sequence =~ /T/ && $sequence =~ /U/;
#Complement, using standard IUPAC codes
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
#Reverse
$sequence = reverse($sequence);
return $sequence, structured_answer => {
data => {
title => $sequence,
subtitle => "Nucleotide reverse complement: $normalized_seq"
},
templates => {
group => 'text'
}
};
};

95
t/ReverseComplement.t Normal file → Executable file
View File

@ -9,82 +9,47 @@ use DDG::Test::Goodie;
zci answer_type => 'reverse_complement';
zci is_cached => 1;
my @aaaacccggt = (
"ACCGGGTTTT",
structured_answer => {
input => ['AAAACCCGGT'],
operation => 'Nucleotide reverse complement',
result => 'ACCGGGTTTT'
});
my @aaaacccggt = ("ACCGGGTTTT",'AAAACCCGGT');
sub build_test {
my ($answer, $input) = @_;
return test_zci($answer, structured_answer => {
data => {
title => $answer,
subtitle => "Nucleotide reverse complement: $input"
},
templates => {
group => 'text'
}
})
}
ddg_goodie_test(
[qw( DDG::Goodie::ReverseComplement)],
#Basic DNA reverse complements, various trigger combinations
'AAAACCCGGT reverse complement' => test_zci(@aaaacccggt),
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci(
"ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA",
structured_answer => {
input => ['TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'],
operation => 'Nucleotide reverse complement',
result => 'ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA'
}
),
'AAAACCCGGT revcomp' => test_zci(@aaaacccggt),
'revcomp AAAACCCGGT' => test_zci(@aaaacccggt),
'revcomp of AAAACCCGGT' => test_zci(@aaaacccggt),
'DNA revcomp of sequence AAAACCCGGT' => test_zci(@aaaacccggt),
'reverse complement of RNA sequence AAAACCCGGU' => test_zci(
"ACCGGGTTTT",
structured_answer => {
input => ['AAAACCCGGU'],
operation => 'Nucleotide reverse complement',
result => 'ACCGGGTTTT'
}
),
'AAAACCCGGT reverse complement' => build_test(@aaaacccggt),
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => build_test("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", 'TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'),
'AAAACCCGGT revcomp' => build_test(@aaaacccggt),
'revcomp AAAACCCGGT' => build_test(@aaaacccggt),
'revcomp of AAAACCCGGT' => build_test(@aaaacccggt),
'DNA revcomp of sequence AAAACCCGGT' => build_test(@aaaacccggt),
'reverse complement of RNA sequence AAAACCCGGU' => build_test("ACCGGGTTTT", 'AAAACCCGGU'),
#RNA reverse complement with acceptable spacing characters
'reverse complement uca gac gga' => test_zci(
"TCCGTCTGA",
structured_answer => {
input => ['UCAGACGGA'],
operation => 'Nucleotide reverse complement',
result => 'TCCGTCTGA',
}
),
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci(
"TCCGTCTGA",
structured_answer => {
input => ['UCAGACGGA'],
operation => 'Nucleotide reverse complement',
result => 'TCCGTCTGA',
}
),
'reverse complement uca gac gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
'reverse complement of nucleotide sequence uca-gac-gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
#With ambiguous bases (both DNA and RNA)
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence'
=> test_zci(
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
structured_answer => {
input => [
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
],
operation => 'Nucleotide reverse complement',
result =>
'TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA'
},
),
=> build_test(
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
),
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
=> test_zci(
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
structured_answer => {
input => [
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
],
operation => 'Nucleotide reverse complement',
result =>
'BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG'
},
=> build_test(
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV',
),
#Mix of DNA and RNA bases (should return empty, as it is more likely that this