ReverseComplement: Switching to text template
parent
e71eb3c5d4
commit
3c152febc2
|
@ -12,39 +12,41 @@ zci is_cached => 1;
|
||||||
|
|
||||||
handle remainder => sub {
|
handle remainder => sub {
|
||||||
|
|
||||||
my $sequence = $_;
|
my $sequence = $_;
|
||||||
|
|
||||||
|
#Remove extra words if supplied
|
||||||
|
$sequence =~ s/\bof\b//gi;
|
||||||
|
$sequence =~ s/\bsequence\b//gi;
|
||||||
|
$sequence =~ s/\b[DR]NA\b//gi;
|
||||||
|
$sequence =~ s/\bnucleotide\b//gi;
|
||||||
|
#Remove whitespace and dashes and make uppercase
|
||||||
|
$sequence =~ s/\s|-//g;
|
||||||
|
$sequence = uc($sequence);
|
||||||
|
#Return nothing if sequence does not contains characters or contains characters
|
||||||
|
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
||||||
|
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
|
||||||
|
my $normalized_seq = $sequence;
|
||||||
|
#DNA contains thymine (T) but not uracil (U);
|
||||||
|
# RNA contains U but not T (with some extremely
|
||||||
|
# rare exceptions). Hence, if the sequence
|
||||||
|
# contains both U and T it's more likely to be an
|
||||||
|
# error than a real molecule so should return nothing.
|
||||||
|
return if $sequence =~ /T/ && $sequence =~ /U/;
|
||||||
|
|
||||||
#Remove extra words if supplied
|
#Complement, using standard IUPAC codes
|
||||||
$sequence =~ s/\bof\b//gi;
|
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
||||||
$sequence =~ s/\bsequence\b//gi;
|
|
||||||
$sequence =~ s/\b[DR]NA\b//gi;
|
|
||||||
$sequence =~ s/\bnucleotide\b//gi;
|
|
||||||
#Remove whitespace and dashes and make uppercase
|
|
||||||
$sequence =~ s/\s|-//g;
|
|
||||||
$sequence = uc($sequence);
|
|
||||||
#Return nothing if sequence does not contains characters or contains characters
|
|
||||||
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
|
||||||
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
|
|
||||||
my $normalized_seq = $sequence;
|
|
||||||
#DNA contains thymine (T) but not uracil (U);
|
|
||||||
# RNA contains U but not T (with some extremely
|
|
||||||
# rare exceptions). Hence, if the sequence
|
|
||||||
# contains both U and T it's more likely to be an
|
|
||||||
# error than a real molecule so should return nothing.
|
|
||||||
return if $sequence =~ /T/ && $sequence =~ /U/;
|
|
||||||
|
|
||||||
#Complement, using standard IUPAC codes
|
#Reverse
|
||||||
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
$sequence = reverse($sequence);
|
||||||
|
|
||||||
#Reverse
|
return $sequence, structured_answer => {
|
||||||
$sequence = reverse($sequence);
|
data => {
|
||||||
|
title => $sequence,
|
||||||
return $sequence,
|
subtitle => "Nucleotide reverse complement: $normalized_seq"
|
||||||
structured_answer => {
|
},
|
||||||
input => [$normalized_seq],
|
templates => {
|
||||||
operation => 'Nucleotide reverse complement',
|
group => 'text'
|
||||||
result => $sequence
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -9,82 +9,47 @@ use DDG::Test::Goodie;
|
||||||
zci answer_type => 'reverse_complement';
|
zci answer_type => 'reverse_complement';
|
||||||
zci is_cached => 1;
|
zci is_cached => 1;
|
||||||
|
|
||||||
my @aaaacccggt = (
|
my @aaaacccggt = ("ACCGGGTTTT",'AAAACCCGGT');
|
||||||
"ACCGGGTTTT",
|
|
||||||
structured_answer => {
|
sub build_test {
|
||||||
input => ['AAAACCCGGT'],
|
my ($answer, $input) = @_;
|
||||||
operation => 'Nucleotide reverse complement',
|
return test_zci($answer, structured_answer => {
|
||||||
result => 'ACCGGGTTTT'
|
data => {
|
||||||
});
|
title => $answer,
|
||||||
|
subtitle => "Nucleotide reverse complement: $input"
|
||||||
|
},
|
||||||
|
templates => {
|
||||||
|
group => 'text'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
ddg_goodie_test(
|
ddg_goodie_test(
|
||||||
[qw( DDG::Goodie::ReverseComplement)],
|
[qw( DDG::Goodie::ReverseComplement)],
|
||||||
|
|
||||||
#Basic DNA reverse complements, various trigger combinations
|
#Basic DNA reverse complements, various trigger combinations
|
||||||
'AAAACCCGGT reverse complement' => test_zci(@aaaacccggt),
|
'AAAACCCGGT reverse complement' => build_test(@aaaacccggt),
|
||||||
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci(
|
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => build_test("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", 'TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'),
|
||||||
"ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA",
|
'AAAACCCGGT revcomp' => build_test(@aaaacccggt),
|
||||||
structured_answer => {
|
'revcomp AAAACCCGGT' => build_test(@aaaacccggt),
|
||||||
input => ['TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'],
|
'revcomp of AAAACCCGGT' => build_test(@aaaacccggt),
|
||||||
operation => 'Nucleotide reverse complement',
|
'DNA revcomp of sequence AAAACCCGGT' => build_test(@aaaacccggt),
|
||||||
result => 'ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA'
|
'reverse complement of RNA sequence AAAACCCGGU' => build_test("ACCGGGTTTT", 'AAAACCCGGU'),
|
||||||
}
|
|
||||||
),
|
|
||||||
'AAAACCCGGT revcomp' => test_zci(@aaaacccggt),
|
|
||||||
'revcomp AAAACCCGGT' => test_zci(@aaaacccggt),
|
|
||||||
'revcomp of AAAACCCGGT' => test_zci(@aaaacccggt),
|
|
||||||
'DNA revcomp of sequence AAAACCCGGT' => test_zci(@aaaacccggt),
|
|
||||||
'reverse complement of RNA sequence AAAACCCGGU' => test_zci(
|
|
||||||
"ACCGGGTTTT",
|
|
||||||
structured_answer => {
|
|
||||||
input => ['AAAACCCGGU'],
|
|
||||||
operation => 'Nucleotide reverse complement',
|
|
||||||
result => 'ACCGGGTTTT'
|
|
||||||
}
|
|
||||||
),
|
|
||||||
|
|
||||||
#RNA reverse complement with acceptable spacing characters
|
#RNA reverse complement with acceptable spacing characters
|
||||||
'reverse complement uca gac gga' => test_zci(
|
'reverse complement uca gac gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
|
||||||
"TCCGTCTGA",
|
'reverse complement of nucleotide sequence uca-gac-gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
|
||||||
structured_answer => {
|
|
||||||
input => ['UCAGACGGA'],
|
|
||||||
operation => 'Nucleotide reverse complement',
|
|
||||||
result => 'TCCGTCTGA',
|
|
||||||
}
|
|
||||||
),
|
|
||||||
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci(
|
|
||||||
"TCCGTCTGA",
|
|
||||||
structured_answer => {
|
|
||||||
input => ['UCAGACGGA'],
|
|
||||||
operation => 'Nucleotide reverse complement',
|
|
||||||
result => 'TCCGTCTGA',
|
|
||||||
}
|
|
||||||
),
|
|
||||||
|
|
||||||
#With ambiguous bases (both DNA and RNA)
|
#With ambiguous bases (both DNA and RNA)
|
||||||
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence'
|
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence'
|
||||||
=> test_zci(
|
=> build_test(
|
||||||
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
|
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
|
||||||
structured_answer => {
|
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
|
||||||
input => [
|
),
|
||||||
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
|
|
||||||
],
|
|
||||||
operation => 'Nucleotide reverse complement',
|
|
||||||
result =>
|
|
||||||
'TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA'
|
|
||||||
},
|
|
||||||
),
|
|
||||||
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
|
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
|
||||||
=> test_zci(
|
=> build_test(
|
||||||
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
|
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
|
||||||
structured_answer => {
|
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV',
|
||||||
input => [
|
|
||||||
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
|
|
||||||
],
|
|
||||||
operation => 'Nucleotide reverse complement',
|
|
||||||
result =>
|
|
||||||
'BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG'
|
|
||||||
},
|
|
||||||
),
|
),
|
||||||
|
|
||||||
#Mix of DNA and RNA bases (should return empty, as it is more likely that this
|
#Mix of DNA and RNA bases (should return empty, as it is more likely that this
|
||||||
|
|
Loading…
Reference in New Issue