diff --git a/lib/DDG/Goodie/Cusip.pm b/lib/DDG/Goodie/Cusip.pm index b2b42f52d..319437044 100644 --- a/lib/DDG/Goodie/Cusip.pm +++ b/lib/DDG/Goodie/Cusip.pm @@ -2,7 +2,6 @@ package DDG::Goodie::Cusip; # ABSTRACT: Validate a CUSIP ID's check digit. use DDG::Goodie; -use POSIX; # metadata name "CUSIP check"; @@ -13,82 +12,93 @@ topics "economy_and_finance"; code_url "https://github.com/tommytommytommy/zeroclickinfo-goodies/lib/DDG/Goodie/Cusip.pm"; attribution github => ["https://github.com/tommytommytommy", 'tommytommytommy']; -triggers start => +triggers startend => "cusip"; zci answer_type => "cusip"; handle remainder => sub { - # magic number to identify the length of the CUSIP ID + # magic number to identify the length of the CUSIP ID my $CUSIPLENGTH = 9; # strip beginning and end whitespace from remainder s/^\s+|\s+$//g; + + # capitalize all letters in the CUSIP + $_ = uc; + + # check that the remainder is the correct length and + # only contains alphanumeric chars and *, @, and # + return if not m/^[A-Z0-9\*\@\#]{$CUSIPLENGTH}$/; + + # split the CUSIP ID (without check digit) into an array of characters + my @cusipIdChars = split(//, $_); + my $inputCheckDigit = pop @cusipIdChars; + + # aggregate checksum value + my $checksum = 0; + + # index variable to track current CUSIP char + my $cusipIndex = 0; + + # calculate the checksum for the CUSIP ID + foreach (@cusipIdChars) { + + # this variable stores the integer equivalent of the CUSIP character + my $currentCusipCharValue = 0; + + # map the current CUSIP character into its integer value + # based on the pseudo algorithm provided by + # https://en.wikipedia.org/wiki/CUSIP#Check_digit_pseudocode + if (m/[0-9]/) { + $currentCusipCharValue = ord($_) - ord('0'); + } elsif (m/[A-Z]/) { + $currentCusipCharValue = ord($_) - ord('A') + 10; + } elsif ($_ eq '*') { + $currentCusipCharValue = 36; + } elsif ($_ eq '@') { + $currentCusipCharValue = 37; + } elsif ($_ eq '#') { + $currentCusipCharValue = 38; + } else { + $currentCusipCharValue = 0; + } + + # double the CUSIP value for every other character starting with the second + if (($cusipIndex + 1) % 2 == 0) { + $currentCusipCharValue *= 2; + } + + # the pseudocode in Wikipedia does not explicitly state that truncating + # the division result is necessary, but empirical testing + # with 037833100 for AAPL and 38259P706 and 38259P508 for GOOG show + # that the truncation is necessary + $checksum += int($currentCusipCharValue / 10) + $currentCusipCharValue % 10; - # check that the remainder is the correct length - return unless m/^(.{$CUSIPLENGTH})$/; - - # check that the remainder only contains alphanumeric chars and *, @, and # - return if not m/^[a-zA-Z0-9\*\@\#]+$/; - - # capitalize all letters in the CUSIP - tr/a-z/A-Z/; - - # aggregate checksum value - my $checksum = 0; - - # iteration index - my $cusipIndex; - - # temporary variables for use within the for loop to store - # the current CUSIP character and its equivalent integer value - my $currentCusipChar; - my $currentCusipCharValue; - - # calculate the checksum for the CUSIP - for ($cusipIndex = 0; $cusipIndex < $CUSIPLENGTH - 1; $cusipIndex++) { - - # extract the current CUSIP character - $currentCusipChar = substr $_, $cusipIndex, 1; - - # map the current CUSIP character into its integer value - # based on the pseudo algorithm provided by - # https://en.wikipedia.org/wiki/CUSIP#Check_digit_pseudocode - $currentCusipCharValue = 0; - for ($currentCusipChar) { - if (/[0-9]/) { - $currentCusipCharValue = ord($currentCusipChar) - ord('0'); - } elsif (/[A-Z]/) { - $currentCusipCharValue = ord($currentCusipChar) - ord('A') + 10; - } elsif ($currentCusipChar eq '*') { - $currentCusipCharValue = 36; - } elsif ($currentCusipChar eq '@') { - $currentCusipCharValue = 37; - } elsif ($currentCusipChar eq '#') { - $currentCusipCharValue = 38; - } else { - $currentCusipCharValue = 0; - } - } - - # double the CUSIP value for every other character starting with the second - if (($cusipIndex + 1) % 2 == 0) { - $currentCusipCharValue *= 2; - } - - # the pseudocode in Wikipedia does not explicitly state that floor() - # is required, but empirical testing with 037833100 for AAPL and - # 38259P706 and 38259P508 for GOOG shows that floor() is necessary - $checksum += floor($currentCusipCharValue / 10) + $currentCusipCharValue % 10; + # increment the character position counter + $cusipIndex++; } - # convert the checksum into a single check digit - my $checkDigit = (10 - ($checksum % 10)) % 10; + # convert the checksum into a single check digit + my $calculatedCheckDigit = chr((10 - ($checksum % 10)) % 10 + ord('0')); - # return the validity of the CUSIP - return "$_ has a valid CUSIP check digit." if $checkDigit eq substr($_, -1); - return "$_ does NOT have a valid CUSIP check digit."; + # store answer-specific strings + my ($article, $result); + + # return the validity of the CUSIP + if ($calculatedCheckDigit eq $inputCheckDigit) { + $article = "a"; + $result = "valid"; + } else { + $article = "an"; + $result = "invalid"; + } + + # create and output results + my $output = html_enc($_)." has $article $result CUSIP check digit."; + my $htmlOutput = "
".html_enc($_)." has $article $result CUSIP check digit.
"; + return $output, html => $htmlOutput; }; 1; diff --git a/share/goodie/cusip/cusip.css b/share/goodie/cusip/cusip.css new file mode 100644 index 000000000..ec3bfff90 --- /dev/null +++ b/share/goodie/cusip/cusip.css @@ -0,0 +1,6 @@ +.zci--cusip { + font-size: 1.5em; + font-weight: 300; + padding-top: .25em; + padding-bottom: .25em; +} \ No newline at end of file diff --git a/t/Cusip.t b/t/Cusip.t index d7d8b2d07..f1f3902d6 100644 --- a/t/Cusip.t +++ b/t/Cusip.t @@ -43,54 +43,55 @@ ddg_goodie_test( # triggers that SHOULD load the IA # typical well-formed queries for AAPL and Southwest - 'cusip 037833100' => test_zci("037833100 has a valid CUSIP check digit."), - 'cusip 844741108' => test_zci("844741108 has a valid CUSIP check digit."), + 'cusip 037833100' => test_zci("037833100 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 844741108' => test_zci("844741108 has a valid CUSIP check digit.", html => qr/.*/), + '037833100 cusip' => test_zci("037833100 has a valid CUSIP check digit.", html => qr/.*/), # starting white space should be stripped - 'cusip 037833100' => test_zci("037833100 has a valid CUSIP check digit."), + 'cusip 037833100' => test_zci("037833100 has a valid CUSIP check digit.", html => qr/.*/), # ending white space should be stripped - 'cusip 037833100 ' => test_zci("037833100 has a valid CUSIP check digit."), + 'cusip 037833100 ' => test_zci("037833100 has a valid CUSIP check digit.", html => qr/.*/), # starting and ending white space should be stripped - 'cusip 037833100 ' => test_zci("037833100 has a valid CUSIP check digit."), + 'cusip 037833100 ' => test_zci("037833100 has a valid CUSIP check digit.", html => qr/.*/), # same AAPL queries with an incorrect check digit - 'cusip 03783310A' => test_zci("03783310A does NOT have a valid CUSIP check digit."), - 'cusip 03783310A' => test_zci("03783310A does NOT have a valid CUSIP check digit."), - 'cusip 03783310A ' => test_zci("03783310A does NOT have a valid CUSIP check digit."), - 'cusip 03783310A ' => test_zci("03783310A does NOT have a valid CUSIP check digit."), + 'cusip 03783310A' => test_zci("03783310A has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 03783310A' => test_zci("03783310A has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 03783310A ' => test_zci("03783310A has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 03783310A ' => test_zci("03783310A has an invalid CUSIP check digit.", html => qr/.*/), # check CUSIP IDs with capital letters (these are for GOOG and Blackberry) - 'cusip 38259P706' => test_zci("38259P706 has a valid CUSIP check digit."), - 'cusip 38259P508' => test_zci("38259P508 has a valid CUSIP check digit."), - 'cusip 09228F103' => test_zci("09228F103 has a valid CUSIP check digit."), + 'cusip 38259P706' => test_zci("38259P706 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 38259P508' => test_zci("38259P508 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 09228F103' => test_zci("09228F103 has a valid CUSIP check digit.", html => qr/.*/), # check the same CUSIP IDs with lower case letters - 'cusip 38259p706' => test_zci("38259P706 has a valid CUSIP check digit."), - 'cusip 38259p508' => test_zci("38259P508 has a valid CUSIP check digit."), - 'cusip 09228f103' => test_zci("09228F103 has a valid CUSIP check digit."), + 'cusip 38259p706' => test_zci("38259P706 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 38259p508' => test_zci("38259P508 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 09228f103' => test_zci("09228F103 has a valid CUSIP check digit.", html => qr/.*/), # check CUSIP IDs with '*', '#', and '@' # these CUSIP ID check digits were calculated by hand # if possible, these tests should be replaced with verified CUSIP IDs - 'cusip 037833*00' => test_zci("037833*00 does NOT have a valid CUSIP check digit."), - 'cusip 037833*02' => test_zci("037833*02 has a valid CUSIP check digit."), - 'cusip 0378331#0' => test_zci("0378331#0 does NOT have a valid CUSIP check digit."), - 'cusip 0378331#7' => test_zci("0378331#7 has a valid CUSIP check digit."), - 'cusip 037833@00' => test_zci("037833\@00 does NOT have a valid CUSIP check digit."), - 'cusip 037833@01' => test_zci("037833\@01 has a valid CUSIP check digit."), + 'cusip 037833*00' => test_zci("037833*00 has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 037833*02' => test_zci("037833*02 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 0378331#0' => test_zci("0378331#0 has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 0378331#7' => test_zci("0378331#7 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip 037833@00' => test_zci("037833\@00 has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 037833@01' => test_zci("037833\@01 has a valid CUSIP check digit.", html => qr/.*/), # CUSIP IDs ending in '*', '#', and '@' should not break the IA # even though they are always invalid IDs - 'cusip 03783310*' => test_zci("03783310* does NOT have a valid CUSIP check digit."), - 'cusip 03783310#' => test_zci("03783310# does NOT have a valid CUSIP check digit."), - 'cusip 03783310@' => test_zci("03783310\@ does NOT have a valid CUSIP check digit."), + 'cusip 03783310*' => test_zci("03783310* has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 03783310#' => test_zci("03783310# has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip 03783310@' => test_zci("03783310\@ has an invalid CUSIP check digit.", html => qr/.*/), # Odd CUSIP IDs should not break the IA - 'cusip ********8' => test_zci("********8 has a valid CUSIP check digit."), - 'cusip ########9' => test_zci("########9 does NOT have a valid CUSIP check digit."), - 'cusip @#*@#*@#*' => test_zci("\@#*\@#*\@#* does NOT have a valid CUSIP check digit."), + 'cusip ********8' => test_zci("********8 has a valid CUSIP check digit.", html => qr/.*/), + 'cusip ########9' => test_zci("########9 has an invalid CUSIP check digit.", html => qr/.*/), + 'cusip @#*@#*@#*' => test_zci("\@#*\@#*\@#* has an invalid CUSIP check digit.", html => qr/.*/), ); done_testing;