How to use the pypinyin.pinyin_dict.pinyin_dict function in pypinyin

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

mozillazg / phrase-pinyin-data / parse_latest_cc_cedict.py View on Github

# -*- coding: utf-8 -*-

import os
import io
import re
import codecs
from pypinyin.phonetic_symbol import phonetic_symbol
from pypinyin.pinyin_dict import pinyin_dict
from pypinyin.style.tone import ToneConverter

ROOT = os.path.dirname(os.path.realpath(__file__))


tone_converter = ToneConverter()
tone3_2_tone_dict = {}
for k, v in pinyin_dict.items():
    parts = v.split(',')
    for part in parts:
        part = part.strip()
        if part:
            tone3 = tone_converter.to_tone3(part).strip().lower()
            if tone3:
                tone3_2_tone_dict[tone3] = part


def tone3_to_tone1(tone3):
    tone3 = tone3.strip().lower()
    # 儿化
    if tone3 == 'r5':
        return 'er'
    # 轻声
    if '5' in tone3:

mozillazg / python-pinyin / tests / test_env.py View on Github

def test_no_copy(cleanup):
    """ 禁用copy操作的测试 """
    import pypinyin.core  # noqa

    assert pypinyin.core.PINYIN_DICT is not pypinyin.pinyin_dict.pinyin_dict

    os.environ['PYPINYIN_NO_DICT_COPY'] = 'true'
    reload(pypinyin.constants)
    assert pypinyin.constants.PINYIN_DICT is pypinyin.pinyin_dict.pinyin_dict

mozillazg / python-pinyin / pypinyin / pinyin.py View on Github

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

import jieba

from . import phrases_dict, phonetic_symbol, pinyin_dict


# 词语拼音库
PHRASES_DICT = phrases_dict.phrases_dict
# 拼音词库
PINYIN_DICT = pinyin_dict.pinyin_dict
# 声母表
INITIALS = "zh,ch,sh,b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,z,c,s,yu,y,w".split(",")
# 韵母表
FINALS = "ang,eng,ing,ong,an,en,in,un,er,ai,ei,ui,ao,ou,iu,ie,ve,a,o,e,i,u,v"
FINALS = FINALS.split(",")

PINYIN_STYLE =  {
  'NORMAL': 0,        # 普通风格，不带音标
  'TONE': 1,          # 标准风格，音标在韵母的第一个字母上
  'TONE2': 2,         # 声调中拼音之后，使用数字 1~4 标识
  'INITIALS': 3,      # 仅需要声母部分
  'FIRST_LETTER': 4   # 仅保留首字母
}
# 带音标字符
PHONETIC_SYMBOL = phonetic_symbol.phonetic_symbol

mozillazg / python-pinyin / tools / get_words_from_zdic_by_bh.py View on Github

url_base = 'http://www.zdic.net/z/jbs/zbh/bs/?jzbh=%s|%s'
    cookies = requests.get('http://www.zdic.net/z/jbs/zbh/').cookies.get_dict()
    word_list = []
    timer = 5

    for m in range(1, 66):  # 总笔画数
        sleep(timer)
        for page_num in xrange(1, 10000):  # 页数
            url = url_base % (m, page_num)
            logger.debug(url)
            html = get_one_page(url, cookies=cookies, headers=headers)
            words = parse_words(html)
            if not words:
                break
            for word in words:
                if word not in pinyin_dict:
                    logger.debug(repr(word))
                    word_list.append(word)
            sleep(timer)

    with io.open('words.txt', 'w', encoding='utf8') as f:
        for word in word_list:
            try:
                f.write(word)
            except Exception as e:
                logger.debug(e + '\n' + repr(word))

mozillazg / python-pinyin / pypinyin / constants.py View on Github

import re

from enum import IntEnum, unique

from pypinyin import pinyin_dict
from pypinyin.compat import SUPPORT_UCS4

# 词语拼音库
if os.environ.get('PYPINYIN_NO_PHRASES'):
    PHRASES_DICT = {}
else:
    from pypinyin import phrases_dict
    PHRASES_DICT = phrases_dict.phrases_dict

# 单字拼音库
PINYIN_DICT = pinyin_dict.pinyin_dict

# 利用环境变量控制不做copy操作(无自定义拼音库的情况), 以减少内存使用
if not os.environ.get('PYPINYIN_NO_DICT_COPY'):
    PINYIN_DICT = PINYIN_DICT.copy()
    PHRASES_DICT = PHRASES_DICT.copy()

# 匹配使用数字标识声调的字符的正则表达式
RE_TONE2 = re.compile(r'([aeoiuvnm])([1-4])$')

# 有拼音的汉字
if SUPPORT_UCS4:
    RE_HANS = re.compile(
        r'^(?:['
        r'\u3007'                  # 〇
        r'\u3400-\u4dbf'           # CJK扩展A:[3400-4DBF]
        r'\u4e00-\u9fff'           # CJK基本:[4E00-9FFF]

mozillazg / python-pinyin / tools / get_words_from_zdic_by_unicode.py View on Github

def get_words(unicode_range, url_base, headers, cookies):
    m = 0
    for n in xrange(int(unicode_range[0], 16), int(unicode_range[1], 16) + 1):
        if n in pinyin_dict:
            continue
        if m > 900:
            m = 0
            sleep(120)
        m += 1

        yield get_word(n, url_base, headers, cookies)
        sleep(1)

How to use the pypinyin.pinyin_dict.pinyin_dict function in pypinyin

To help you get started, we’ve selected a few pypinyin examples, based on popular ways it is used in public projects.

pypinyin

Package Health Score

Popular pypinyin functions

Similar packages