Merge pull request #3303 from eflyjason/pinyin

New Chinese to Pinyin Goodies
master
Daniel Davis 2016-07-07 17:59:52 +09:00 committed by GitHub
commit 7bc3c9bc3f
2 changed files with 219 additions and 0 deletions

View File

@ -0,0 +1,145 @@
package DDG::Goodie::ChineseToPinyin;
# ABSTRACT: Get Pinyin of a Chinese string.
use strict;
use utf8;
use DDG::Goodie;
use Lingua::Han::PinYin;
zci answer_type => 'chinese_to_pinyin';
zci is_cached => 1;
triggers startend => 'pinyin', '拼音';
# FROM https://github.com/lilydjwg/winterpy/blob/master/pylib/pinyintone.py
# map (final) constanant+tone to tone+constanant
my %mapConstTone2ToneConst = ('n1' => '1n',
'n2' => '2n',
'n3' => '3n',
'n4' => '4n',
'ng1' => '1ng',
'ng2' => '2ng',
'ng3' => '3ng',
'ng4' => '4ng',
'r1' => '1r',
'r2' => '2r',
'r3' => '3r',
'r4' => '4r');
# map vowel+vowel+tone to vowel+tone+vowel
my %mapVowelVowelTone2VowelToneVowel = ('ai1' => 'a1i',
'ai2' => 'a2i',
'ai3' => 'a3i',
'ai4' => 'a4i',
'ao1' => 'a1o',
'ao2' => 'a2o',
'ao3' => 'a3o',
'ao4' => 'a4o',
'ei1' => 'e1i',
'ei2' => 'e2i',
'ei3' => 'e3i',
'ei4' => 'e4i',
'ou1' => 'o1u',
'ou2' => 'o2u',
'ou3' => 'o3u',
'ou4' => 'o4u');
# map vowel-number combination to unicode
my %mapVowelTone2Unicode = ('a1' => 'ā',
'a2' => 'á',
'a3' => 'ǎ',
'a4' => 'à',
'e1' => 'ē',
'e2' => 'é',
'e3' => 'ě',
'e4' => 'è',
'i1' => 'ī',
'i2' => 'í',
'i3' => 'ǐ',
'i4' => 'ì',
'o1' => 'ō',
'o2' => 'ó',
'o3' => 'ǒ',
'o4' => 'ò',
'u1' => 'ū',
'u2' => 'ú',
'u3' => 'ǔ',
'u4' => 'ù',
'v1' => 'ǜ',
'v2' => 'ǘ',
'v3' => 'ǚ',
'v4' => 'ǜ');
# MAIN
handle remainder_lc => sub {
# return if content if empty
return if /^\s*$/;
# return if already have special pinyin character
return if /[āáǎàēéěèīíǐìōóǒòūúǔùǜǘǚǜ]/;
# return unless content have Chinese character or have number (e.g. 測試/ce4 shi4)
return unless /[\p{Han}0-9]/;
$_ = trim($_);
# add a space after every chinese character in order to separate pinyin
my $spacedChineseString = $_ =~ s/(\p{Han})/$1 /rg;
# remove space before any punctuation to prevent something like "hao3 "
$spacedChineseString = $spacedChineseString =~ s/(\p{Han}) (\p{P})/$1$2/rg;
my $h2p = new Lingua::Han::PinYin(tone => 1);
my $result = $h2p->han2pinyin($spacedChineseString);
$result = ConvertTone($result);
# if content doesn't contain Chinese and result doesn't contain special Pinyin character
return if (!(/[\p{Han}]/) and $result !~ m/[āáǎàēéěèīíǐìōóǒòūúǔùǜǘǚǜ]/);
return "Pinyin of $_ is \"$result\"",
structured_answer => {
data => {
title => "$result",
subtitle => "Pinyin of $_"
},
templates => {
group => 'text'
}
};
};
# sub: convert e.g. ni3 hao3 to nǐ hǎo
sub ConvertTone{
print("Before convert tone: @_\n");
# trim
my $new = trim(@_);
for my $key ( keys %mapConstTone2ToneConst ) {
#print "$key: $mapConstTone2ToneConst{$key} \n";
$new = $new =~ s/$key/$mapConstTone2ToneConst{$key}/rg;
}
for my $key ( keys %mapVowelVowelTone2VowelToneVowel ) {
#print "$key: $mapVowelVowelTone2VowelToneVowel{$key} \n";
$new = $new =~ s/$key/$mapVowelVowelTone2VowelToneVowel{$key}/rg;
}
for my $key ( keys %mapVowelTone2Unicode ) {
#print "$key: $mapVowelTone2Unicode{$key} \n";
$new = $new =~ s/$key/$mapVowelTone2Unicode{$key}/rg;
}
$new = $new =~ s/v/ü/rg;
$new = $new =~ s/V/Ü/rg;
print("After convert tone: $new\n");
return "$new";
}
sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };
1;

74
t/ChineseToPinyin.t Normal file
View File

@ -0,0 +1,74 @@
#!/usr/bin/env perl
use utf8;
use strict;
use warnings;
use Test::More;
use Test::Deep;
use DDG::Test::Goodie;
zci answer_type => "chinese_to_pinyin";
zci is_cached => 1;
ddg_goodie_test(
[qw( DDG::Goodie::ChineseToPinyin )],
# At a minimum, be sure to include tests for all:
# - primary_example_queries
# - secondary_example_queries
'pinyin 你好' => test_zci(
"Pinyin of 你好 is \"nǐ hǎo\"",
structured_answer => {
data => {
title => "nǐ hǎo",
subtitle => "Pinyin of 你好",
},
templates => {
group => "text",
}
}
),
'pinyin 女生' => test_zci(
"Pinyin of 女生 is \"nǚ shēng\"",
structured_answer => {
data => {
title => "nǚ shēng",
subtitle => "Pinyin of 女生",
},
templates => {
group => "text",
}
}
),
'pinyin lai2 zi4 zhong1 guo2' => test_zci(
"Pinyin of lai2 zi4 zhong1 guo2 is \"lái zì zhōng guó\"",
structured_answer => {
data => {
title => "lái zì zhōng guó",
subtitle => "Pinyin of lai2 zi4 zhong1 guo2",
},
templates => {
group => "text",
}
}
),
'PINYIN PENG2 YOU3' => test_zci(
"Pinyin of peng2 you3 is \"péng yǒu\"",
structured_answer => {
data => {
title => "péng yǒu",
subtitle => "Pinyin of peng2 you3",
},
templates => {
group => "text",
}
}
),
# Try to include some examples of queries on which it might
# appear that your answer will trigger, but does not.
'pinyin ' => undef,
'pinyin yes3' => undef,
'pinyin how are you?' => undef,
'pinyin zhōng guó' => undef,
);
done_testing;