资讯专栏INFORMATION COLUMN

PHP 汉字转拼音

AnthonyHan / 3442人阅读

摘要:拼音字符转换图将中文编码成拼音要转换为拼音的字符串返回格式每个字的首字母全拼音字符串字母中文转换到拼音每次处理一个字符待处理字符双字节拼音结果为拼音首字母测试文字结果为全拼音测试文字结果为第一个拼音测试文字

class Pinyin {
    /**
     * 拼音字符转换图
     * @var array
     */
    private $data = array(
        "a"=>-20319,"ai"=>-20317,"an"=>-20304,"ang"=>-20295,"ao"=>-20292,
        "ba"=>-20283,"bai"=>-20265,"ban"=>-20257,"bang"=>-20242,"bao"=>-20230,"bei"=>-20051,"ben"=>-20036,"beng"=>-20032,"bi"=>-20026,"bian"=>-20002,"biao"=>-19990,"bie"=>-19986,"bin"=>-19982,"bing"=>-19976,"bo"=>-19805,"bu"=>-19784,
        "ca"=>-19775,"cai"=>-19774,"can"=>-19763,"cang"=>-19756,"cao"=>-19751,"ce"=>-19746,"ceng"=>-19741,"cha"=>-19739,"chai"=>-19728,"chan"=>-19725,"chang"=>-19715,"chao"=>-19540,"che"=>-19531,"chen"=>-19525,"cheng"=>-19515,"chi"=>-19500,"chong"=>-19484,"chou"=>-19479,"chu"=>-19467,"chuai"=>-19289,"chuan"=>-19288,"chuang"=>-19281,"chui"=>-19275,"chun"=>-19270,"chuo"=>-19263,"ci"=>-19261,"cong"=>-19249,"cou"=>-19243,"cu"=>-19242,"cuan"=>-19238,"cui"=>-19235,"cun"=>-19227,"cuo"=>-19224,
        "da"=>-19218,"dai"=>-19212,"dan"=>-19038,"dang"=>-19023,"dao"=>-19018,"de"=>-19006,"deng"=>-19003,"di"=>-18996,"dian"=>-18977,"diao"=>-18961,"die"=>-18952,"ding"=>-18783,"diu"=>-18774,"dong"=>-18773,"dou"=>-18763,"du"=>-18756,"duan"=>-18741,"dui"=>-18735,"dun"=>-18731,"duo"=>-18722,
        "e"=>-18710,"en"=>-18697,"er"=>-18696,
        "fa"=>-18526,"fan"=>-18518,"fang"=>-18501,"fei"=>-18490,"fen"=>-18478,"feng"=>-18463,"fo"=>-18448,"fou"=>-18447,"fu"=>-18446,
        "ga"=>-18239,"gai"=>-18237,"gan"=>-18231,"gang"=>-18220,"gao"=>-18211,"ge"=>-18201,"gei"=>-18184,"gen"=>-18183,"geng"=>-18181,"gong"=>-18012,"gou"=>-17997,"gu"=>-17988,"gua"=>-17970,"guai"=>-17964,"guan"=>-17961,"guang"=>-17950,"gui"=>-17947,"gun"=>-17931,"guo"=>-17928,
        "ha"=>-17922,"hai"=>-17759,"han"=>-17752,"hang"=>-17733,"hao"=>-17730,"he"=>-17721,"hei"=>-17703,"hen"=>-17701,"heng"=>-17697,"hong"=>-17692,"hou"=>-17683,"hu"=>-17676,"hua"=>-17496,"huai"=>-17487,"huan"=>-17482,"huang"=>-17468,"hui"=>-17454,"hun"=>-17433,"huo"=>-17427,
        "ji"=>-17417,"jia"=>-17202,"jian"=>-17185,"jiang"=>-16983,"jiao"=>-16970,"jie"=>-16942,"jin"=>-16915,"jing"=>-16733,"jiong"=>-16708,"jiu"=>-16706,"ju"=>-16689,"juan"=>-16664,"jue"=>-16657,"jun"=>-16647,
        "ka"=>-16474,"kai"=>-16470,"kan"=>-16465,"kang"=>-16459,"kao"=>-16452,"ke"=>-16448,"ken"=>-16433,"keng"=>-16429,"kong"=>-16427,"kou"=>-16423,"ku"=>-16419,"kua"=>-16412,"kuai"=>-16407,"kuan"=>-16403,"kuang"=>-16401,"kui"=>-16393,"kun"=>-16220,"kuo"=>-16216,
        "la"=>-16212,"lai"=>-16205,"lan"=>-16202,"lang"=>-16187,"lao"=>-16180,"le"=>-16171,"lei"=>-16169,"leng"=>-16158,"li"=>-16155,"lia"=>-15959,"lian"=>-15958,"liang"=>-15944,"liao"=>-15933,"lie"=>-15920,"lin"=>-15915,"ling"=>-15903,"liu"=>-15889,"long"=>-15878,"lou"=>-15707,"lu"=>-15701,"lv"=>-15681,"luan"=>-15667,"lue"=>-15661,"lun"=>-15659,"luo"=>-15652,
        "ma"=>-15640,"mai"=>-15631,"man"=>-15625,"mang"=>-15454,"mao"=>-15448,"me"=>-15436,"mei"=>-15435,"men"=>-15419,"meng"=>-15416,"mi"=>-15408,"mian"=>-15394,"miao"=>-15385,"mie"=>-15377,"min"=>-15375,"ming"=>-15369,"miu"=>-15363,"mo"=>-15362,"mou"=>-15183,"mu"=>-15180,
        "na"=>-15165,"nai"=>-15158,"nan"=>-15153,"nang"=>-15150,"nao"=>-15149,"ne"=>-15144,"nei"=>-15143,"nen"=>-15141,"neng"=>-15140,"ni"=>-15139,"nian"=>-15128,"niang"=>-15121,"niao"=>-15119,"nie"=>-15117,"nin"=>-15110,"ning"=>-15109,"niu"=>-14941,"nong"=>-14937,"nu"=>-14933,"nv"=>-14930,"nuan"=>-14929,"nue"=>-14928,"nuo"=>-14926,
        "o"=>-14922,"ou"=>-14921,
        "pa"=>-14914,"pai"=>-14908,"pan"=>-14902,"pang"=>-14894,"pao"=>-14889,"pei"=>-14882,"pen"=>-14873,"peng"=>-14871,"pi"=>-14857,"pian"=>-14678,"piao"=>-14674,"pie"=>-14670,"pin"=>-14668,"ping"=>-14663,"po"=>-14654,"pu"=>-14645,
        "qi"=>-14630,"qia"=>-14594,"qian"=>-14429,"qiang"=>-14407,"qiao"=>-14399,"qie"=>-14384,"qin"=>-14379,"qing"=>-14368,"qiong"=>-14355,"qiu"=>-14353,"qu"=>-14345,"quan"=>-14170,"que"=>-14159,"qun"=>-14151,
        "ran"=>-14149,"rang"=>-14145,"rao"=>-14140,"re"=>-14137,"ren"=>-14135,"reng"=>-14125,"ri"=>-14123,"rong"=>-14122,"rou"=>-14112,"ru"=>-14109,"ruan"=>-14099,"rui"=>-14097,"run"=>-14094,"ruo"=>-14092,
        "sa"=>-14090,"sai"=>-14087,"san"=>-14083,"sang"=>-13917,"sao"=>-13914,"se"=>-13910,"sen"=>-13907,"seng"=>-13906,"sha"=>-13905,"shai"=>-13896,"shan"=>-13894,"shang"=>-13878,"shao"=>-13870,"she"=>-13859,"shen"=>-13847,"sheng"=>-13831,"shi"=>-13658,"shou"=>-13611,"shu"=>-13601,"shua"=>-13406,"shuai"=>-13404,"shuan"=>-13400,"shuang"=>-13398,"shui"=>-13395,"shun"=>-13391,"shuo"=>-13387,"si"=>-13383,"song"=>-13367,"sou"=>-13359,"su"=>-13356,"suan"=>-13343,"sui"=>-13340,"sun"=>-13329,"suo"=>-13326,
        "ta"=>-13318,"tai"=>-13147,"tan"=>-13138,"tang"=>-13120,"tao"=>-13107,"te"=>-13096,"teng"=>-13095,"ti"=>-13091,"tian"=>-13076,"tiao"=>-13068,"tie"=>-13063,"ting"=>-13060,"tong"=>-12888,"tou"=>-12875,"tu"=>-12871,"tuan"=>-12860,"tui"=>-12858,"tun"=>-12852,"tuo"=>-12849,
        "wa"=>-12838,"wai"=>-12831,"wan"=>-12829,"wang"=>-12812,"wei"=>-12802,"wen"=>-12607,"weng"=>-12597,"wo"=>-12594,"wu"=>-12585,
        "xi"=>-12556,"xia"=>-12359,"xian"=>-12346,"xiang"=>-12320,"xiao"=>-12300,"xie"=>-12120,"xin"=>-12099,"xing"=>-12089,"xiong"=>-12074,"xiu"=>-12067,"xu"=>-12058,"xuan"=>-12039,"xue"=>-11867,"xun"=>-11861,
        "ya"=>-11847,"yan"=>-11831,"yang"=>-11798,"yao"=>-11781,"ye"=>-11604,"yi"=>-11589,"yin"=>-11536,"ying"=>-11358,"yo"=>-11340,"yong"=>-11339,"you"=>-11324,"yu"=>-11303,"yuan"=>-11097,"yue"=>-11077,"yun"=>-11067,
        "za"=>-11055,"zai"=>-11052,"zan"=>-11045,"zang"=>-11041,"zao"=>-11038,"ze"=>-11024,"zei"=>-11020,"zen"=>-11019,"zeng"=>-11018,"zha"=>-11014,"zhai"=>-10838,"zhan"=>-10832,"zhang"=>-10815,"zhao"=>-10800,"zhe"=>-10790,"zhen"=>-10780,"zheng"=>-10764,"zhi"=>-10587,"zhong"=>-10544,"zhou"=>-10533,"zhu"=>-10519,"zhua"=>-10331,"zhuai"=>-10329,"zhuan"=>-10328,"zhuang"=>-10322,"zhui"=>-10315,"zhun"=>-10309,"zhuo"=>-10307,"zi"=>-10296,"zong"=>-10281,"zou"=>-10274,"zu"=>-10270,"zuan"=>-10262,"zui"=>-10260,"zun"=>-10256,"zuo"=>-10254
    );

    /**
     * 将中文编码成拼音
     * @param string $str 要转换为拼音的字符串
     * @param string $RetFormat 返回格式 [first:每个字的首字母|all:全拼音|one:字符串字母]
     * @return string
     */
    public function get_pinyin($str, $RetFormat="first")
    {
        $GBK = iconv("UTF-8", "GBK", $str);
        $UTF8 = iconv("GBK", "UTF-8", $GBK);
        if($UTF8 != $str) $GBK = $str;
        $Buf = array();
        for ($i=0, $iLoop=strlen($GBK); $i<$iLoop; $i++) {
            $Chr = ord($GBK{$i});
            if ($Chr>160) {
                $Chr = ($Chr<<8) + ord($GBK{++$i}) - 65536;
            }
            if ("first" == $RetFormat || "one" == $RetFormat) {
                $Buf[] = substr($this->zh2py($Chr),0,1);
            }else{
                $Buf[] = $this->zh2py($Chr);
            }

        }
        if ("first" === $RetFormat) {
            return implode("", $Buf);
        }elseif("one" == $RetFormat) {
            return $Buf[0];
        }else{
            return implode(" ", $Buf);
        }
    }

    /**
     * 中文转换到拼音(每次处理一个字符)
     * @param number $iWORD 待处理字符双字节
     * @return string 拼音
     */
    private function zh2py($iWORD) 
    {
        if($iWORD>0 && $iWORD<160 ) {
            return chr($iWORD);
        } elseif ($iWORD<-20319||$iWORD>-10247) {
            return "";
        } else {
            foreach ($this->data as $py => $code) {
                if($code > $iWORD) break;
                $result = $py;
            }
            return $result;
        }
    }
}

$Pinyin = new Pinyin();
//结果为拼音首字母
echo $Pinyin->get_pinyin("测试文字"), "
"; //结果为全拼音 echo $Pinyin->get_pinyin("测试文字", "all"), "
"; //结果为第一个拼音 echo $Pinyin->get_pinyin("测试文字", "one"), "
";

文章版权归作者所有,未经允许请勿转载,若此文章存在违规行为,您可以联系管理员删除。

转载请注明本文地址:https://www.ucloud.cn/yun/21316.html

相关文章

  • PHP汉字拼音

    摘要:最简单最准确的中文转拼音的类项目地址支持获取拼音以及拼音的缩写即首字母支持等编码能准确匹配千多个常用汉字单个汉字,一句话,中英文混合都完美支持例子版早上好获取拼音早上好获取拼音缩写例子版早上好获取拼音早上好获取拼音缩写 最简单、最准确的PHP中文转拼音的类 github项目地址:https://github.com/jifei/Pinyin 支持获取拼音以及拼音的缩写即首字母 ...

    tulayang 评论0 收藏0
  • PHP汉字拼音(支持生僻字)

    摘要:基于语言的汉语转拼音的类兼容编码,无须特殊处理对中文默认返回拼音首字母缩写,其它字符不作处理直接返回。汉字拼音对照数组基础数据

    Kerr1Gan 评论0 收藏0
  • 汉字拼音

    摘要:多拼音格式输出支持支持多种拼音输出格式带音标不带音标数字表示音标以及拼音首字母输出格式常见多音字识别支持常见多音字的识别,其中包括词组成语地名等简繁体中文转换经对比以上三个类库,个人认为更加适合。 1、目前在百度或谷歌上搜索java汉字转拼音,主要有以下三种: 1)piny4j:搜索结果介绍文章最多的,然而此类库对生僻字支持不好,转换时会报空指针(java.lang.NullPoint...

    frank_fun 评论0 收藏0
  • 汉字拼音 支持20870个汉字,包含完整的多音字

    摘要:如果转进制,则位进制只能表示个汉字,这个太少了。假设我们用个进制的字符,则可以表示,表示个汉字绰绰有余,但这时候需要多个字符。 项目地址:https://github.com/xinglie/pi... 压缩说明 为了便于在网络上的传输,本人想办法写了一个压缩算法,目前这个版本是压缩好的,带自解压功能以下说明压缩思路: 理想状态下是使用Stirng.prototype.localCom...

    cikenerd 评论0 收藏0

发表评论

0条评论

最新活动
阅读需要支付1元查看
<