Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding: utf-8 -*-
import os
import io
import re
import codecs
from pypinyin.phonetic_symbol import phonetic_symbol
from pypinyin.pinyin_dict import pinyin_dict
from pypinyin.style.tone import ToneConverter
ROOT = os.path.dirname(os.path.realpath(__file__))
tone_converter = ToneConverter()
tone3_2_tone_dict = {}
for k, v in pinyin_dict.items():
parts = v.split(',')
for part in parts:
part = part.strip()
if part:
tone3 = tone_converter.to_tone3(part).strip().lower()
if tone3:
tone3_2_tone_dict[tone3] = part
def tone3_to_tone1(tone3):
tone3 = tone3.strip().lower()
# 儿化
if tone3 == 'r5':
return 'er'
# 轻声
if '5' in tone3:
def test_pinyin_finals():
"""只包含韵母的词语"""
hans = '嗷嗷'
assert pinyin(hans) == [['\xe1o'], ['\xe1o']]
assert pinyin(hans + 'abc') == [['\xe1o'], ['\xe1o'], ['abc']]
assert pinyin(hans, NORMAL) == [['ao'], ['ao']]
assert pinyin(hans, TONE) == [['\xe1o'], ['\xe1o']]
assert pinyin(hans, TONE2) == [['a2o'], ['a2o']]
assert pinyin(hans, TONE3) == [['ao2'], ['ao2']]
assert pinyin(hans, INITIALS) == [[''], ['']]
assert pinyin(hans, FIRST_LETTER) == [['a'], ['a']]
assert pinyin(hans, BOPOMOFO) == [['ㄠˊ'], ['ㄠˊ']]
assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄠ'], ['ㄠ']]
assert pinyin(hans, CYRILLIC) == [['ао2'], ['ао2']]
assert pinyin(hans, CYRILLIC_FIRST) == [['а'], ['а']]
assert pinyin(hans, heteronym=True) == [['\xe1o'], ['\xe1o']]
assert pinyin('啊', heteronym=True) == \
[['a', 'è', 'ā', 'á', 'ǎ', 'à']]
assert pinyin(hans, style=FINALS) == [['ao'], ['ao']]
assert pinyin(hans, style=FINALS_TONE) == [['\xe1o'], ['\xe1o']]
assert pinyin(hans, style=FINALS_TONE2) == [['a2o'], ['a2o']]
assert pinyin(hans, style=FINALS_TONE3) == [['ao2'], ['ao2']]
def test_uei(hans, kwargs, result):
assert lazy_pinyin(hans, **kwargs) == result
assert pinyin(hans, **kwargs) == [result]
def test_pinyin_initials():
"""包含声明和韵母的词语"""
hans = '中心'
# 默认风格,带声调
assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
# 普通风格,不带声调
assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
# 声调风格,拼音声调在韵母第一个字母上
assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
# 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
# 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
# 声母风格,只返回各个拼音的声母部分
assert pinyin(hans, INITIALS) == [['zh'], ['x']]
assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
# 首字母风格,只返回拼音的首字母部分
assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
# 注音风格,带声调
assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
# 注音风格,首字母
assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
hans = '中心'
# 默认风格,带声调
assert pinyin(hans) == [['zh\u014dng'], ['x\u012bn']]
assert pinyin(hans, strict=False) == [['zh\u014dng'], ['x\u012bn']]
# 普通风格,不带声调
assert pinyin(hans, NORMAL) == [['zhong'], ['xin']]
assert pinyin(hans, NORMAL, strict=False) == [['zhong'], ['xin']]
# 声调风格,拼音声调在韵母第一个字母上
assert pinyin(hans, TONE) == [['zh\u014dng'], ['x\u012bn']]
assert pinyin(hans, TONE, strict=False) == [['zh\u014dng'], ['x\u012bn']]
# 声调风格2,即拼音声调在各个声母之后,用数字 [1-4] 进行表示
assert pinyin(hans, TONE2) == [['zho1ng'], ['xi1n']]
assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
# 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
# 声母风格,只返回各个拼音的声母部分
assert pinyin(hans, INITIALS) == [['zh'], ['x']]
assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
# 首字母风格,只返回拼音的首字母部分
assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
# 注音风格,带声调
assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
# 注音风格,首字母
assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
# test CYRILLIC style
assert pinyin(hans, CYRILLIC) == [['чжун1'], ['синь1']]
assert pinyin(hans, CYRILLIC, strict=False) == [['чжун1'], ['синь1']]
# CYRILLIC_FIRST style return only first letters
assert pinyin(hans, TONE2, strict=False) == [['zho1ng'], ['xi1n']]
# 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示
assert pinyin(hans, TONE3) == [['zhong1'], ['xin1']]
assert pinyin(hans, TONE3, strict=False) == [['zhong1'], ['xin1']]
# 声母风格,只返回各个拼音的声母部分
assert pinyin(hans, INITIALS) == [['zh'], ['x']]
assert pinyin(hans, INITIALS, strict=False) == [['zh'], ['x']]
# 首字母风格,只返回拼音的首字母部分
assert pinyin(hans, FIRST_LETTER) == [['z'], ['x']]
assert pinyin(hans, FIRST_LETTER, strict=False) == [['z'], ['x']]
# 注音风格,带声调
assert pinyin(hans, BOPOMOFO) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
assert pinyin(hans, BOPOMOFO, strict=False) == [['ㄓㄨㄥ'], ['ㄒㄧㄣ']]
# 注音风格,首字母
assert pinyin(hans, BOPOMOFO_FIRST) == [['ㄓ'], ['ㄒ']]
assert pinyin(hans, BOPOMOFO_FIRST, strict=False) == [['ㄓ'], ['ㄒ']]
# test CYRILLIC style
assert pinyin(hans, CYRILLIC) == [['чжун1'], ['синь1']]
assert pinyin(hans, CYRILLIC, strict=False) == [['чжун1'], ['синь1']]
# CYRILLIC_FIRST style return only first letters
assert pinyin(hans, CYRILLIC_FIRST) == [['ч'], ['с']]
assert pinyin(hans, CYRILLIC_FIRST, strict=False) == [['ч'], ['с']]
# 启用多音字模式
assert pinyin(hans, heteronym=True) == [['zh\u014dng', 'zh\xf2ng'],
['x\u012bn']]
assert pinyin(hans, heteronym=True, strict=False) == \
[['zh\u014dng', 'zh\xf2ng'], ['x\u012bn']]
# 韵母风格1,只返回各个拼音的韵母部分,不带声调
assert pinyin(hans, style=FINALS) == [['ong'], ['in']]
assert pinyin(hans, style=FINALS, strict=False) == [['ong'], ['in']]
# 韵母风格2,带声调,声调在韵母第一个字母上
assert pinyin(hans, style=FINALS_TONE) == [['\u014dng'], ['\u012bn']]
def test_custom_pinyin_dict_tone2():
load_single_dict({ord('桔'): 'ce4,si4'}, style='tone2')
assert lazy_pinyin('桔', style=TONE2) == ['ce4']
assert pinyin('桔') == [['cè']]
def test_36():
hans = '两年前七斤喝醉了酒'
pys = ['liang', 'nian', 'qian', 'qi', 'jin', 'he', 'zui', 'le', 'jiu']
assert lazy_pinyin(hans) == pys
def test_neutral_tone_with_5_many_cases(input, expected_old, expected_new):
assert lazy_pinyin(input, style=Style.TONE2) == expected_old
assert my_pinyin.lazy_pinyin(input, style=Style.TONE2) == expected_new
['嘸', Style.TONE2, ['m1', 'm2']],
['誒', Style.TONE, ['ê̄', 'ế', 'ê̌', 'ề']],
['誒', Style.TONE2, ['ê1', 'ê2', 'ê3', 'ê4']],
])
def test_m_e(han, style, expect):
result = pinyin(han, style=style, heteronym=True)
assert len(result) == 1
assert (set(result[0]) & set(expect)) == set(expect)