ReverseComplement: Switching to text template
parent
e71eb3c5d4
commit
3c152febc2
|
@ -12,39 +12,41 @@ zci is_cached => 1;
|
|||
|
||||
handle remainder => sub {
|
||||
|
||||
my $sequence = $_;
|
||||
|
||||
|
||||
#Remove extra words if supplied
|
||||
$sequence =~ s/\bof\b//gi;
|
||||
$sequence =~ s/\bsequence\b//gi;
|
||||
$sequence =~ s/\b[DR]NA\b//gi;
|
||||
$sequence =~ s/\bnucleotide\b//gi;
|
||||
#Remove whitespace and dashes and make uppercase
|
||||
$sequence =~ s/\s|-//g;
|
||||
$sequence = uc($sequence);
|
||||
#Return nothing if sequence does not contains characters or contains characters
|
||||
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
||||
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
|
||||
my $normalized_seq = $sequence;
|
||||
#DNA contains thymine (T) but not uracil (U);
|
||||
# RNA contains U but not T (with some extremely
|
||||
# rare exceptions). Hence, if the sequence
|
||||
# contains both U and T it's more likely to be an
|
||||
# error than a real molecule so should return nothing.
|
||||
return if $sequence =~ /T/ && $sequence =~ /U/;
|
||||
|
||||
#Complement, using standard IUPAC codes
|
||||
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
||||
|
||||
#Reverse
|
||||
$sequence = reverse($sequence);
|
||||
|
||||
return $sequence,
|
||||
structured_answer => {
|
||||
input => [$normalized_seq],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => $sequence
|
||||
my $sequence = $_;
|
||||
|
||||
#Remove extra words if supplied
|
||||
$sequence =~ s/\bof\b//gi;
|
||||
$sequence =~ s/\bsequence\b//gi;
|
||||
$sequence =~ s/\b[DR]NA\b//gi;
|
||||
$sequence =~ s/\bnucleotide\b//gi;
|
||||
#Remove whitespace and dashes and make uppercase
|
||||
$sequence =~ s/\s|-//g;
|
||||
$sequence = uc($sequence);
|
||||
#Return nothing if sequence does not contains characters or contains characters
|
||||
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
||||
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
|
||||
my $normalized_seq = $sequence;
|
||||
#DNA contains thymine (T) but not uracil (U);
|
||||
# RNA contains U but not T (with some extremely
|
||||
# rare exceptions). Hence, if the sequence
|
||||
# contains both U and T it's more likely to be an
|
||||
# error than a real molecule so should return nothing.
|
||||
return if $sequence =~ /T/ && $sequence =~ /U/;
|
||||
|
||||
#Complement, using standard IUPAC codes
|
||||
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
||||
|
||||
#Reverse
|
||||
$sequence = reverse($sequence);
|
||||
|
||||
return $sequence, structured_answer => {
|
||||
data => {
|
||||
title => $sequence,
|
||||
subtitle => "Nucleotide reverse complement: $normalized_seq"
|
||||
},
|
||||
templates => {
|
||||
group => 'text'
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
|
|
@ -9,82 +9,47 @@ use DDG::Test::Goodie;
|
|||
zci answer_type => 'reverse_complement';
|
||||
zci is_cached => 1;
|
||||
|
||||
my @aaaacccggt = (
|
||||
"ACCGGGTTTT",
|
||||
structured_answer => {
|
||||
input => ['AAAACCCGGT'],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => 'ACCGGGTTTT'
|
||||
});
|
||||
my @aaaacccggt = ("ACCGGGTTTT",'AAAACCCGGT');
|
||||
|
||||
sub build_test {
|
||||
my ($answer, $input) = @_;
|
||||
return test_zci($answer, structured_answer => {
|
||||
data => {
|
||||
title => $answer,
|
||||
subtitle => "Nucleotide reverse complement: $input"
|
||||
},
|
||||
templates => {
|
||||
group => 'text'
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
ddg_goodie_test(
|
||||
[qw( DDG::Goodie::ReverseComplement)],
|
||||
|
||||
#Basic DNA reverse complements, various trigger combinations
|
||||
'AAAACCCGGT reverse complement' => test_zci(@aaaacccggt),
|
||||
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => test_zci(
|
||||
"ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA",
|
||||
structured_answer => {
|
||||
input => ['TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => 'ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA'
|
||||
}
|
||||
),
|
||||
'AAAACCCGGT revcomp' => test_zci(@aaaacccggt),
|
||||
'revcomp AAAACCCGGT' => test_zci(@aaaacccggt),
|
||||
'revcomp of AAAACCCGGT' => test_zci(@aaaacccggt),
|
||||
'DNA revcomp of sequence AAAACCCGGT' => test_zci(@aaaacccggt),
|
||||
'reverse complement of RNA sequence AAAACCCGGU' => test_zci(
|
||||
"ACCGGGTTTT",
|
||||
structured_answer => {
|
||||
input => ['AAAACCCGGU'],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => 'ACCGGGTTTT'
|
||||
}
|
||||
),
|
||||
'AAAACCCGGT reverse complement' => build_test(@aaaacccggt),
|
||||
'reverse complement of TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT' => build_test("ACGCCGCCAGCGTTCGTCCTGAGCCATGATCAAA", 'TTTGATCATGGCTCAGGACGAACGCTGGCGGCGT'),
|
||||
'AAAACCCGGT revcomp' => build_test(@aaaacccggt),
|
||||
'revcomp AAAACCCGGT' => build_test(@aaaacccggt),
|
||||
'revcomp of AAAACCCGGT' => build_test(@aaaacccggt),
|
||||
'DNA revcomp of sequence AAAACCCGGT' => build_test(@aaaacccggt),
|
||||
'reverse complement of RNA sequence AAAACCCGGU' => build_test("ACCGGGTTTT", 'AAAACCCGGU'),
|
||||
|
||||
#RNA reverse complement with acceptable spacing characters
|
||||
'reverse complement uca gac gga' => test_zci(
|
||||
"TCCGTCTGA",
|
||||
structured_answer => {
|
||||
input => ['UCAGACGGA'],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => 'TCCGTCTGA',
|
||||
}
|
||||
),
|
||||
'reverse complement of nucleotide sequence uca-gac-gga' => test_zci(
|
||||
"TCCGTCTGA",
|
||||
structured_answer => {
|
||||
input => ['UCAGACGGA'],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result => 'TCCGTCTGA',
|
||||
}
|
||||
),
|
||||
'reverse complement uca gac gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
|
||||
'reverse complement of nucleotide sequence uca-gac-gga' => build_test("TCCGTCTGA", 'UCAGACGGA'),
|
||||
|
||||
#With ambiguous bases (both DNA and RNA)
|
||||
'reverse complement TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA DNA sequence'
|
||||
=> test_zci(
|
||||
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
|
||||
structured_answer => {
|
||||
input => [
|
||||
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
|
||||
],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result =>
|
||||
'TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA'
|
||||
},
|
||||
),
|
||||
=> build_test(
|
||||
"TGAGTTTCAACCTTGSGATCGTACTCBAGGMGGTATGCTTATCAGGTTGCCTTCGKACTGAGTGATTACTCACBCYCACCAAGCATACATCGTTTATGGYGTGGACTACCAGGGTATKTAATCCHWWTTTGA",
|
||||
'TCAAAWWDGGATTAMATACCCTGGTAGTCCACRCCATAAACGATGTATGCTTGGTGRGVGTGAGTAATCACTCAGTMCGAAGGCAACCTGATAAGCATACCKCCTVGAGTACGATCSCAAGGTTGAAACTCA'
|
||||
),
|
||||
'reverse complement CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
|
||||
=> test_zci(
|
||||
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
|
||||
structured_answer => {
|
||||
input => [
|
||||
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV'
|
||||
],
|
||||
operation => 'Nucleotide reverse complement',
|
||||
result =>
|
||||
'BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG'
|
||||
},
|
||||
=> build_test(
|
||||
"BCGTWWCCCGGACGKACAAAATCGCDCCTCTCGTCCCTCTTGACYAGCGHGTTTACGAGGCTAAGCCCCTTCATCCCGCACGCGYCGTCGCRTGGTTCAGACTTGCYGTCCAMTTGACCAATATTCCTCACTGCTGCCTCCCGTAGGAGTCTGGACCGRTGTCTCAGTTCCAGTGTGGCTGTTCGTCMCTCTCAGACCAGCTACCGASTCGTCGGCTTGGMTAG",
|
||||
'CUAKCCAAGCCGACGASUCGGUAGCUGGUCUGAGAGKGACGAACAGCCACACUGGAACUGAGACAYCGGUCCAGACUCCUACGGGAGGCAGCAGUGAGGAAUAUUGGUCAAKUGGACRGCAAGUCUGAACCAYGCGACGRCGCGUGCGGGAUGAAGGGGCUUAGCCUCGUAAACDCGCURGUCAAGAGGGACGAGAGGHGCGAUUUUGUMCGUCCGGGWWACGV',
|
||||
),
|
||||
|
||||
#Mix of DNA and RNA bases (should return empty, as it is more likely that this
|
||||
|
|
Loading…
Reference in New Issue