利用PHP實現(xiàn)詞法分析器與自定義語言
之前項目有一個需求,業(yè)務(wù)人員使用中文編寫一些自定義公式,然后需要我們后臺執(zhí)行將結(jié)果返回到界面上,于是就基于有限狀態(tài)機寫了這個詞法分析器,比較簡單,希望能夠拋磚引玉。
一、分析需求
輸入中文公式,返回結(jié)果,比如:
現(xiàn)有薪資=10000;
個稅起點=3000;
當(dāng)前年份=2021;
如果(當(dāng)前年份=2022){
個稅起點=5000;
}
返回 (現(xiàn)有薪資-個稅起點) * 0.2;
二、實現(xiàn)需求
最初的想法是使用字符串替換的方式,將中文關(guān)鍵字替換成 php 的關(guān)鍵字,然后調(diào)用 eval 執(zhí)行,這樣確實也是可以的,但是總覺得不是很美麗,并且不能實現(xiàn)動態(tài)解析。就想著自己實現(xiàn)一個簡單的詞法分析,然后結(jié)合 ast 將詞法轉(zhuǎn)換成 php 代碼執(zhí)行,豈不快哉。當(dāng)前版本沒有用到抽象語法樹來生成代碼,全部使用字符串拼接。
<?php /** ?*?Class?Lexer ?*?@package?Sett\OaLang ?*?詞法分析器 ???*/ ???class?Lexer?{ ???//?內(nèi)置關(guān)鍵字集合 ???public?$keywordList?=?[]; ???//?內(nèi)置操作符集合 ???public?$operatorList?=?[ ???????"+",?"-",?"*",?"/",?"=",?">",?"<",?"!",?"(",?")",?"{",?"}",?",",?";" ???]; ???//?源代碼 ???private?$input; ???//?當(dāng)前的字符 ???private?$currChar; ???//?當(dāng)前字符位置 ???private?$currCharPos?=?0; ???//?結(jié)束符 ???private?$eof?=?"eof"; ???//?當(dāng)前編碼 ???private?$currEncode??=?"UTF-8"; ???//?內(nèi)置關(guān)鍵字 ???public?const?VAR?=?"variable"; ???public?const?STR?=?"string"; ???public?const?KW??=?"keyword"; ???public?const?OPR?=?"operator"; ???public?const?INT?=?"integer"; ???public?const?NIL?=?"null"; ????/** ?????*?Lexer?constructor. ?????*?@param?string?$input ?????*/ ????public?function?__construct(string?$input)?{ ????????$this->input????=?$input; ????????$this->currChar?=?mb_substr($this->input,?$this->currCharPos,?1); ????} ???? ????/** ?????*?@param?array?$keywordList ?????*/ ????public?function?setKeywordList($keywordList)?{ ????????$this->keywordList?=?$keywordList; ????} ???? ????/** ?????*?@return?array ?????*?@throws?Exception ?????*/ ????public?function?parseInput()?{ ????????if?($this->input?==?"")?{ ????????????throw?new?Exception("code?can?not?be?empty"); ????????} ????????$tokens?=?[]; ????????do?{ ????????????$token?=?$this->nextToken(); ????????????if?($token["type"]?!=?"eof")?{ ????????????????$tokens[]?=?$token; ????????????} ????????????if?($token["type"]?==?self::KW)?{ ????????????????$tokens[]?=?$this->makeToken(self::NIL,?"?"); ????????????} ????????}?while?($token["type"]?!=?"eof"); ????????return?$tokens; ????} ???? ????/** ?????*?@return?array ?????*/ ????public?function?nextToken()?{ ????????$this->skipBlankChar(); ????????$this->currChar?==?""?&&?$this->currChar?=?$this->eof; ????????if?($this->isCnLetter())?{ ????????????$word?=?$this->matchUntilNextCharIsNotCn(); ????????????if?($this->isKeyword($word))?{ ????????????????$this->currCharPos?-=?1; ????????????????return?$this->currToken(static::KW,?$word); ????????????} ????????????//?不是關(guān)鍵字的全部歸為變量 ????????????return?$this->makeToken(static::VAR,?$word); ????????} ????????//?如果是操作符 ????????if?($this->isOperator())?{ ????????????return?$this->currToken(static::OPR,?$this->currChar); ????????} ????????//?如果是數(shù)字 ????????if?($this->isNumber())?{ ????????????return?$this->currToken(static::INT,?$this->currChar); ????????} ????????//?如果是字符串 ????????if?($str?=?$this->isStr())?{ ????????????return?$this->currToken(static::STR,?$str); ????????} ????????//?如果是變量 ????????if?($this->isVar())?{ ????????????$word?=?$this->matchVar(); ????????????if?($this->isKeyword($word))?{ ????????????????return?$this->currToken(static::KW,?$word); ????????????} ????????????return?$this->makeToken(static::VAR,?$word); ????????} ????????if?($this->currChar?==?$this->eof)?{ ????????????return?$this->currToken('eof',?$this->currChar); ????????} ????????return?$this->currToken(static::VAR,?$this->currChar); ????} ???? ????/** ?????*?@param?string?$input ?????*?@return?string ?????*/ ????private?function?matchVar(string?$input?=?"")?{ ????????$word?=?$input??:?''; ????????while?($this->isVar())?{ ????????????$word?.=?$this->currChar; ????????????$this->nextChar(); ????????} ????????return?$word; ????} ???? ????/** ?????*?@return?bool ?????*?是否為普通變量 ?????*/ ????private?function?isVar()?{ ????????return?$this->isCnLetter()?||?$this->isEnLetter(); ????} ????/** ?????*?跳過空白字符 ?????*/ ????private?function?skipBlankChar()?{ ????????while?(ord($this->currChar)?==?10?|| ????????????ord($this->currChar)?==?13?|| ????????????ord($this->currChar)?==?32)?{ ????????????$this->nextChar(); ????????} ????} ???? ????/** ?????*?@param?string?$type ?????*?@param?$word ?????*?@return?array ?????*?記錄當(dāng)前token和下一個字符 ?????*/ ????private?function?currToken(string?$type,?$word)?{ ????????$token?=?$this->makeToken($type,?$word); ????????$this->nextChar(); ????????return?$token; ????} ???? ????/** ?????*?@param?string?$type ?????*?@param?string?$char ?????*?@return?array ?????*/ ????private?function?makeToken(string?$type,?string?$char)?{ ????????return?["type"?=>?$type,?"char"?=>?$char,?"pos"?=>?$this->currCharPos]; ????} ????/** ?????*?@return?bool ?????*?判斷是否是英文字符 ?????*/ ????private?function?isEnLetter()?{ ????????if?($this->currChar?==?""?||?$this->currChar?==?$this->eof)?{ ????????????return?false; ????????} ????????$ord?=?mb_ord($this->currChar,?$this->currEncode); ????????if?($ord?>?ord('a')?&&?$ord?<?ord('z'))?{ ????????????return?true; ????????} ????????return?false; ????} ???? ????/** ?????*?@return?false|int ?????*?是否中文字符 ?????*/ ????private?function?isCnLetter()?{ ????????return?preg_match("/^[\x{4e00}-\x{9fa5}]+$/u",?$this->currChar); ????} ???? ????/** ?????*?@return?bool ?????*?是否為數(shù)字 ?????*/ ????private?function?isNumber()?{ ????????return?is_numeric($this->currChar); ????} ???? ????/** ?????*?@return?bool ?????*?是否是字符串 ?????*/ ????private?function?isStr()?{ ????????return?$this->matchCompleteStr(); ????} ???? ????/** ?????*?@return?string ?????*?匹配完整字符串 ?????*/ ????private?function?matchCompleteStr()?{ ????????$char?=?""; ????????if?($this->currChar?==?"\"")?{ ????????????$this->nextChar(); ????????????while?($this->currChar?!=?"\"")?{ ????????????????if?($this->currChar?!=?"\"")?{ ????????????????????$char?.=?$this->currChar; ????????????????} ????????????????$this->nextChar(); ????????????} ????????????return?$char; ????????} ????????return?$char; ????} ???? ????/** ?????*?@return?bool ?????*?是否是操作符 ?????*/ ????private?function?isOperator()?{ ????????return?in_array($this->currChar,?$this->operatorList); ????} ???? ????/** ?????*?@return?string ?????*?匹配中文字符 ?????*/ ????private?function?matchUntilNextCharIsNotCn()?{ ????????$char?=?""; ????????while?($this->isCnLetter())?{ ????????????$char?.=?$this->currChar; ????????????$this->nextChar(); ????????} ????????return?$char; ????} ???? ????/** ?????*?@return?void?獲取下一個字符 ?????*?獲取下一個字符 ?????*/ ????private?function?nextChar()?{ ????????$this->currCharPos?+=?1; ????????$this->currChar????=?mb_substr($this->input,?$this->currCharPos,?1); ????????if?($this->currChar?==?"")?{ ????????????$this->currChar?=?$this->eof; ????????} ????} ???? ????/** ?????*?@param?string?$input ?????*?@return?bool ?????*?是否是關(guān)鍵字 ?????*/ ????private?function?isKeyword(string?$input)?{ ????????return?($this->keywordList[$input]????"")?!=?""; ????} ???? ????public?function?convert(array?$tokens)?{ ????????$code?=?""; ????????foreach?($this->lexerIterator($tokens)?as?$generator)?{ ????????????switch?($generator["type"])?{ ????????????????case?static::KW: ????????????????????$code?.=?$this->keywordList[$generator["char"]]; ????????????????????break; ????????????????case?static::VAR: ????????????????????$code?.=?sprintf("$%s",?$generator["char"]); ????????????????????break; ????????????????case?static::OPR: ????????????????????$code?.=?$this->replace($generator["char"]); ????????????????????break; ????????????????case?static::INT: ????????????????????$code?.=?$generator["char"]; ????????????????????break; ????????????????case?static::STR: ????????????????????$code?.=?sprintf("\"%s\"",?$generator["char"]); ????????????????????break; ????????????????default: ????????????????????$code?.=?$generator["char"]; ????????????} ????????} ????????return?$code; ????} ???? ????private?function?replace(string?$char)?{ ????????return?str_replace("+",?".",?$char); ????} ???? ????/** ?????*?@param?array?$tokens ?????*?@return?\Generator ?????*/ ????private?function?lexerIterator(array?$tokens)?{ ????????foreach?($tokens?as?$index?=>?$token)?{ ????????????yield?$token; ????????} ????} }
三、如何使用
require?__DIR__?.?"/vendor/autoload.php"; //?定義一段代碼 $code?=?<<<EOF 姓名="腕豪"; 問候="你好啊"; 地址=(1+2)?*?3; 如果(地址?>?3){ ????地址=1; }否則{ ????地址="艾歐尼亞" } 說話?=?("我"+"愛")+"你"; 返回?姓名+年齡; EOF; $lexer?=?new?Lexer($code); //?自定義你的關(guān)鍵字 $kwMap?=?[ ????"如果"?=>?"if",?"否則"?=>?"else",?"返回"?=>?"return",?"否則如果"?=>?"elseif" ]; $lexer->setKeywordList($kwMap); //?這里是生成的詞 $tokens?=?$lexer->parseInput(); //?將生成的詞轉(zhuǎn)成php,當(dāng)然你也可以嘗試用php-parse轉(zhuǎn)ast再轉(zhuǎn)成php,這里只是簡單的拼接 var_dump($lexer->convert($tokens));
生成詞
[{ ????"type":?"variable", ????"char":?"姓名", ????"pos":?2 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?2 },?{ ????"type":?"string", ????"char":?"腕豪", ????"pos":?7 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?8 },?{ ????"type":?"variable", ????"char":?"問候", ????"pos":?13 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?13 },?{ ????"typ?e":?"string", ????"char":?"你好啊", ????"pos":?17 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?18 },?{ ????"type":?"variable", ????"char":?"地址", ????"pos":?23 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?23 },?{ ????"type":?"operator", ????"char":?"(", ????"pos":?24 },?{ ????"type":?"integer", ????"char":?"1", ????"pos":?25 },?{ ????"type":?"operator", ????"char":?"?+", ????"pos":?26 },?{ ????"type":?"integer", ????"char":?"2", ????"pos":?27 },?{ ????"type":?"operator", ????"char":?")", ????"pos":?28 },?{ ????"type":?"operator", ????"char":?"*", ????"pos":?30 },?{ ????"type":?"integer", ????"char":?"3", ????"pos":?32 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?33 },?{ ????"type":?"keyword", ????"char":?"如果", ????"pos":?37 },?{ ????"type":?"nul?l", ????"char":?"?", ????"pos":?38 },?{ ????"type":?"operator", ????"char":?"(", ????"pos":?38 },?{ ????"type":?"variable", ????"char":?"地址", ????"pos":?41 },?{ ????"type":?"operator", ????"char":?">", ????"pos":?42 },?{ ????"type":?"integer", ????"char":?"3", ????"pos":?44 },?{ ????"type":?"operator", ????"char":?")", ????"pos":?45 },?{ ????"type":?"operator", ????"char":?"{", ????"pos":?46 },?{ ????"type":?"variable", ????"char":?"地址", ????"pos":?55 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?55 },?{ ????"type":?"integer", ????"char":?"1", ????"pos":?56 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?57 },?{ ????"type":?"operator", ????"char":?"}", ????"pos":?60 },?{ ????"type":?"keyword", ????"char":?"否則", ????"pos":?62 },?{ ????"type":?"null", ????"char?":?"?", ????"pos":?63 },?{ ????"type":?"operator", ????"char":?"{", ????"pos":?63 },?{ ????"type":?"variable", ????"char":?"地址", ????"pos":?72 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?72 },?{ ????"type":?"string", ????"char":?"艾歐尼亞", ????"pos":?78 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?79 },?{ ????"type":?"operator", ????"char":?"}", ????"pos":?82 },?{ ????"type":?"variable", ????"char":?"說話", ????"pos":?87 },?{ ????"type":?"operator", ????"char":?"=", ????"pos":?88 },?{ ????"type":?"operator", ????"char":?"(", ????"pos":?90 },?{ ????"type":?"string", ????"char":?"我", ????"pos":?93 },?{ ????"type":?"operator", ????"char":?"+", ????"pos":?94 },?{ ????"type":?"string", ????"char":?"愛", ????"pos":?97 },?{ ????"type":?"operator", ????"char":?")", ????"pos":?98 },?{ ????"type":?"operator", ????"char":?"+", ????"pos":?99 },?{ ????"type":?"string", ????"char":?"你", ????"pos":?102 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?103 },?{ ????"type":?"keyword", ????"char":?"返回", ????"pos":?107 },?{ ????"type":?"null", ????"char":?"?", ????"pos":?108 },?{ ????"type":?"variable", ????"char":?"姓名", ????"pos":?111 },?{ ????"typ?e":?"operator", ????"char":?"+", ????"pos":?111 },?{ ????"type":?"variable", ????"char":?"年齡", ????"pos":?114 },?{ ????"type":?"operator", ????"char":?";", ????"pos":?114 }]
輸出:
$姓名="腕豪";$問候="你好啊";$地址=(1.2)*3;if ($地址>3){$地址=1;}else {$地址="艾歐尼亞";}$說話=("我"."愛")."你";return $姓名.$年齡;
能執(zhí)行嗎?當(dāng)然能。還存在一些小 bug,不想改了。
四、使用場景
什么,居然有人說沒什么用?oa 系統(tǒng)總有用到的時候。
到此這篇關(guān)于利用PHP實現(xiàn)詞法分析器與自定義語言的文章就介紹到這了,更多相關(guān)PHP詞法分析器內(nèi)容請搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
相關(guān)文章
《Head First 設(shè)計模式》代碼之PHP版(面向?qū)ο髮W(xué)習(xí))
《Head First 設(shè)計模式》是本不錯的講解設(shè)計模式的書,不像F4寫的那么枯燥,應(yīng)該算是比較容易理解的好書。2010-06-06PHP使用mysql_fetch_object從查詢結(jié)果中獲取對象集的方法
這篇文章主要介紹了PHP使用mysql_fetch_object從查詢結(jié)果中獲取對象集的方法,實例分析了php操作mysql_fetch_object查詢數(shù)據(jù)庫的技巧,非常具有實用價值,需要的朋友可以參考下2015-03-03用PHP實現(xiàn)小寫金額轉(zhuǎn)換大寫金額的代碼(精確到分)
數(shù)字金額轉(zhuǎn)換成中文大寫金額的函數(shù) String Int $num 要轉(zhuǎn)換的小寫數(shù)字或小寫字符串2012-01-01php如何實現(xiàn)不借助IDE快速定位行數(shù)或者方法定義的文件和位置
這篇文章主要介紹了php如何實現(xiàn)不借助IDE快速定位行數(shù)或者方法定義的文件和位置的相關(guān)資料,需要的朋友可以參考下2017-01-01PHP導(dǎo)出MySQL數(shù)據(jù)到Excel文件(fputcsv)
經(jīng)常會碰到需要從數(shù)據(jù)庫中導(dǎo)出數(shù)據(jù)到Excel文件,用一些開源的類庫,比如PHPExcel,確實比較容易實現(xiàn),但對大量數(shù)據(jù)的支持很不好,很容易到達PHP內(nèi)存使用上限。2011-07-07