2014-06-03 19:22:39 -07:00
|
|
|
package DDG::Goodie::ReverseComplement;
|
|
|
|
# ABSTRACT: Give the DNA reverse complement of a DNA or RNA sequence.
|
|
|
|
|
2015-02-22 12:09:29 -08:00
|
|
|
use strict;
|
2014-06-03 19:22:39 -07:00
|
|
|
use DDG::Goodie;
|
2014-06-07 19:26:48 -07:00
|
|
|
use feature 'state';
|
2014-06-03 19:22:39 -07:00
|
|
|
|
2014-07-01 03:13:42 -07:00
|
|
|
triggers any => 'reverse complement', 'revcomp';
|
2014-10-06 05:06:42 -07:00
|
|
|
|
|
|
|
zci answer_type => 'reverse_complement';
|
|
|
|
zci is_cached => 1;
|
2014-06-03 19:22:39 -07:00
|
|
|
|
|
|
|
name 'Reverse Complement';
|
|
|
|
description 'Give the DNA reverse complement of a DNA or RNA sequence';
|
|
|
|
primary_example_queries 'revcomp AAAACCCGGT';
|
|
|
|
category 'transformations';
|
|
|
|
topics 'science';
|
|
|
|
code_url 'https://github.com/duckduckgo/zeroclickinfo-goodies/blob/master/lib/DDG/Goodie/ReverseComplement.pm';
|
2015-01-07 10:24:47 -08:00
|
|
|
attribution github => ['http://github.com/wilkox', 'David Wilkins'];
|
2014-06-03 19:22:39 -07:00
|
|
|
|
|
|
|
handle remainder => sub {
|
|
|
|
|
2014-07-01 03:13:42 -07:00
|
|
|
my $sequence = $_;
|
2014-06-03 19:22:39 -07:00
|
|
|
|
2014-10-06 05:06:42 -07:00
|
|
|
|
2014-07-01 03:13:42 -07:00
|
|
|
#Remove extra words if supplied
|
|
|
|
$sequence =~ s/\bof\b//gi;
|
|
|
|
$sequence =~ s/\bsequence\b//gi;
|
|
|
|
$sequence =~ s/\b[DR]NA\b//gi;
|
|
|
|
$sequence =~ s/\bnucleotide\b//gi;
|
|
|
|
#Remove whitespace and dashes and make uppercase
|
|
|
|
$sequence =~ s/\s|-//g;
|
|
|
|
$sequence = uc($sequence);
|
2014-07-13 04:25:17 -07:00
|
|
|
#Return nothing if sequence does not contains characters or contains characters
|
2014-07-01 03:13:42 -07:00
|
|
|
# other than DNA/RNA bases or standard IUPAC ambiguity codes
|
2014-07-13 07:46:16 -07:00
|
|
|
return unless ($sequence =~ /^[ATCGURYKMSWBVDHN]+$/);
|
2014-10-06 05:06:42 -07:00
|
|
|
my $normalized_seq = $sequence;
|
2014-07-01 03:13:42 -07:00
|
|
|
#DNA contains thymine (T) but not uracil (U);
|
|
|
|
# RNA contains U but not T (with some extremely
|
|
|
|
# rare exceptions). Hence, if the sequence
|
|
|
|
# contains both U and T it's more likely to be an
|
|
|
|
# error than a real molecule so should return nothing.
|
|
|
|
return if $sequence =~ /T/ && $sequence =~ /U/;
|
2014-06-03 19:22:39 -07:00
|
|
|
|
2014-07-01 03:13:42 -07:00
|
|
|
#Complement, using standard IUPAC codes
|
|
|
|
$sequence =~ tr/ATUCGRYKMBVHD/TAAGCYRMKVBDH/;
|
2014-06-03 19:22:39 -07:00
|
|
|
|
2014-07-01 03:13:42 -07:00
|
|
|
#Reverse
|
|
|
|
$sequence = reverse($sequence);
|
2014-10-28 11:36:15 -07:00
|
|
|
|
2014-10-06 05:06:42 -07:00
|
|
|
return $sequence,
|
|
|
|
structured_answer => {
|
|
|
|
input => [$normalized_seq],
|
2015-01-09 00:05:46 -08:00
|
|
|
operation => 'Nucleotide reverse complement',
|
2014-10-06 05:06:42 -07:00
|
|
|
result => $sequence
|
|
|
|
};
|
2014-06-03 19:22:39 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
1;
|