commit
7bc3c9bc3f
|
@ -0,0 +1,145 @@
|
|||
package DDG::Goodie::ChineseToPinyin;
|
||||
# ABSTRACT: Get Pinyin of a Chinese string.
|
||||
|
||||
use strict;
|
||||
use utf8;
|
||||
use DDG::Goodie;
|
||||
use Lingua::Han::PinYin;
|
||||
|
||||
zci answer_type => 'chinese_to_pinyin';
|
||||
zci is_cached => 1;
|
||||
|
||||
triggers startend => 'pinyin', '拼音';
|
||||
|
||||
|
||||
# FROM https://github.com/lilydjwg/winterpy/blob/master/pylib/pinyintone.py
|
||||
|
||||
# map (final) constanant+tone to tone+constanant
|
||||
my %mapConstTone2ToneConst = ('n1' => '1n',
|
||||
'n2' => '2n',
|
||||
'n3' => '3n',
|
||||
'n4' => '4n',
|
||||
'ng1' => '1ng',
|
||||
'ng2' => '2ng',
|
||||
'ng3' => '3ng',
|
||||
'ng4' => '4ng',
|
||||
'r1' => '1r',
|
||||
'r2' => '2r',
|
||||
'r3' => '3r',
|
||||
'r4' => '4r');
|
||||
|
||||
# map vowel+vowel+tone to vowel+tone+vowel
|
||||
my %mapVowelVowelTone2VowelToneVowel = ('ai1' => 'a1i',
|
||||
'ai2' => 'a2i',
|
||||
'ai3' => 'a3i',
|
||||
'ai4' => 'a4i',
|
||||
'ao1' => 'a1o',
|
||||
'ao2' => 'a2o',
|
||||
'ao3' => 'a3o',
|
||||
'ao4' => 'a4o',
|
||||
'ei1' => 'e1i',
|
||||
'ei2' => 'e2i',
|
||||
'ei3' => 'e3i',
|
||||
'ei4' => 'e4i',
|
||||
'ou1' => 'o1u',
|
||||
'ou2' => 'o2u',
|
||||
'ou3' => 'o3u',
|
||||
'ou4' => 'o4u');
|
||||
|
||||
# map vowel-number combination to unicode
|
||||
my %mapVowelTone2Unicode = ('a1' => 'ā',
|
||||
'a2' => 'á',
|
||||
'a3' => 'ǎ',
|
||||
'a4' => 'à',
|
||||
'e1' => 'ē',
|
||||
'e2' => 'é',
|
||||
'e3' => 'ě',
|
||||
'e4' => 'è',
|
||||
'i1' => 'ī',
|
||||
'i2' => 'í',
|
||||
'i3' => 'ǐ',
|
||||
'i4' => 'ì',
|
||||
'o1' => 'ō',
|
||||
'o2' => 'ó',
|
||||
'o3' => 'ǒ',
|
||||
'o4' => 'ò',
|
||||
'u1' => 'ū',
|
||||
'u2' => 'ú',
|
||||
'u3' => 'ǔ',
|
||||
'u4' => 'ù',
|
||||
'v1' => 'ǜ',
|
||||
'v2' => 'ǘ',
|
||||
'v3' => 'ǚ',
|
||||
'v4' => 'ǜ');
|
||||
|
||||
|
||||
# MAIN
|
||||
|
||||
handle remainder_lc => sub {
|
||||
# return if content if empty
|
||||
return if /^\s*$/;
|
||||
|
||||
# return if already have special pinyin character
|
||||
return if /[āáǎàēéěèīíǐìōóǒòūúǔùǜǘǚǜ]/;
|
||||
|
||||
# return unless content have Chinese character or have number (e.g. 測試/ce4 shi4)
|
||||
return unless /[\p{Han}0-9]/;
|
||||
|
||||
|
||||
$_ = trim($_);
|
||||
|
||||
# add a space after every chinese character in order to separate pinyin
|
||||
my $spacedChineseString = $_ =~ s/(\p{Han})/$1 /rg;
|
||||
# remove space before any punctuation to prevent something like "hao3 !"
|
||||
$spacedChineseString = $spacedChineseString =~ s/(\p{Han}) (\p{P})/$1$2/rg;
|
||||
|
||||
my $h2p = new Lingua::Han::PinYin(tone => 1);
|
||||
my $result = $h2p->han2pinyin($spacedChineseString);
|
||||
$result = ConvertTone($result);
|
||||
|
||||
# if content doesn't contain Chinese and result doesn't contain special Pinyin character
|
||||
return if (!(/[\p{Han}]/) and $result !~ m/[āáǎàēéěèīíǐìōóǒòūúǔùǜǘǚǜ]/);
|
||||
|
||||
|
||||
return "Pinyin of $_ is \"$result\"",
|
||||
structured_answer => {
|
||||
data => {
|
||||
title => "$result",
|
||||
subtitle => "Pinyin of $_"
|
||||
},
|
||||
templates => {
|
||||
group => 'text'
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
# sub: convert e.g. ni3 hao3 to nǐ hǎo
|
||||
|
||||
sub ConvertTone{
|
||||
print("Before convert tone: @_\n");
|
||||
|
||||
# trim
|
||||
my $new = trim(@_);
|
||||
|
||||
for my $key ( keys %mapConstTone2ToneConst ) {
|
||||
#print "$key: $mapConstTone2ToneConst{$key} \n";
|
||||
$new = $new =~ s/$key/$mapConstTone2ToneConst{$key}/rg;
|
||||
}
|
||||
for my $key ( keys %mapVowelVowelTone2VowelToneVowel ) {
|
||||
#print "$key: $mapVowelVowelTone2VowelToneVowel{$key} \n";
|
||||
$new = $new =~ s/$key/$mapVowelVowelTone2VowelToneVowel{$key}/rg;
|
||||
}
|
||||
for my $key ( keys %mapVowelTone2Unicode ) {
|
||||
#print "$key: $mapVowelTone2Unicode{$key} \n";
|
||||
$new = $new =~ s/$key/$mapVowelTone2Unicode{$key}/rg;
|
||||
}
|
||||
$new = $new =~ s/v/ü/rg;
|
||||
$new = $new =~ s/V/Ü/rg;
|
||||
print("After convert tone: $new\n");
|
||||
return "$new";
|
||||
}
|
||||
|
||||
sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s };
|
||||
|
||||
1;
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
use utf8;
|
||||
use strict;
|
||||
use warnings;
|
||||
use Test::More;
|
||||
use Test::Deep;
|
||||
use DDG::Test::Goodie;
|
||||
|
||||
zci answer_type => "chinese_to_pinyin";
|
||||
zci is_cached => 1;
|
||||
|
||||
ddg_goodie_test(
|
||||
[qw( DDG::Goodie::ChineseToPinyin )],
|
||||
# At a minimum, be sure to include tests for all:
|
||||
# - primary_example_queries
|
||||
# - secondary_example_queries
|
||||
'pinyin 你好' => test_zci(
|
||||
"Pinyin of 你好 is \"nǐ hǎo\"",
|
||||
structured_answer => {
|
||||
data => {
|
||||
title => "nǐ hǎo",
|
||||
subtitle => "Pinyin of 你好",
|
||||
},
|
||||
templates => {
|
||||
group => "text",
|
||||
}
|
||||
}
|
||||
),
|
||||
'pinyin 女生' => test_zci(
|
||||
"Pinyin of 女生 is \"nǚ shēng\"",
|
||||
structured_answer => {
|
||||
data => {
|
||||
title => "nǚ shēng",
|
||||
subtitle => "Pinyin of 女生",
|
||||
},
|
||||
templates => {
|
||||
group => "text",
|
||||
}
|
||||
}
|
||||
),
|
||||
'pinyin lai2 zi4 zhong1 guo2' => test_zci(
|
||||
"Pinyin of lai2 zi4 zhong1 guo2 is \"lái zì zhōng guó\"",
|
||||
structured_answer => {
|
||||
data => {
|
||||
title => "lái zì zhōng guó",
|
||||
subtitle => "Pinyin of lai2 zi4 zhong1 guo2",
|
||||
},
|
||||
templates => {
|
||||
group => "text",
|
||||
}
|
||||
}
|
||||
),
|
||||
'PINYIN PENG2 YOU3' => test_zci(
|
||||
"Pinyin of peng2 you3 is \"péng yǒu\"",
|
||||
structured_answer => {
|
||||
data => {
|
||||
title => "péng yǒu",
|
||||
subtitle => "Pinyin of peng2 you3",
|
||||
},
|
||||
templates => {
|
||||
group => "text",
|
||||
}
|
||||
}
|
||||
),
|
||||
# Try to include some examples of queries on which it might
|
||||
# appear that your answer will trigger, but does not.
|
||||
'pinyin ' => undef,
|
||||
'pinyin yes3' => undef,
|
||||
'pinyin how are you?' => undef,
|
||||
'pinyin zhōng guó' => undef,
|
||||
);
|
||||
|
||||
done_testing;
|
Loading…
Reference in New Issue