模組:NumberToChinese
外观
local p = {}
local calc_lib = {} --用於處理科學記號
local yesno = require('Module:Yesno')
local function stringToTable(s) --字串轉陣列
local t = {}
for i = 1, #s do --將每一個字元存入陣列
t[i] = s:sub(i, i)
end
return t
end
local ClearTenOne, NoClearOne, ClearAllOne = 0, 1, 2 --清除首位1的狀況,清十前1、不清1、清所有首位1
local Normal, Financial = 0, 1 --小寫和大寫
local Over, Ten, Hundred, Thousand, LargeStart = 10, 11, 12, 13, 14 --Over是萬進的節點ID,LargeStart是萬進位數中文的開始ID
local Data = { --中文數字資料
standard = {{'〇', '一', '二', '三', '四', '五', '六', '七', '八', '九'}, {'零', '壹', '貳', '參', '肆', '伍', '陸', '柒', '捌', '玖'}},
decimal = {{'', '十', '百', '千'}, {'', '拾', '佰', '仟'}},
large = {'', '萬', '億', '兆', '京', '垓', '秭', '穰', '溝', '澗', '正', '載', '極', '恆河沙', '阿僧祇', '那由他', '不可思議', '無量', '大數'},
NotANumber = "這不是一個數字", Infinity = "無窮大", NumberingZero = '〇',
Positive = '正', Negative = '負', Point = '點', Dash = '之'
}
local largeSize = #(Data.large)
local function argsToVariable(frame) --輸入參數陣列轉變數
local args = require('Module:Arguments').getArgs(frame)
local number = args.num or args.number or args[1] or 0
local numberType = args.b or args.daiji or args["大寫"] or args["大写"] or args.numberType or Normal
numberType = tonumber(numberType) or (yesno(numberType) and Financial or Normal)
if (numberType > Financial) then
numberType = Financial
end
local clearOne = args.ten or args.clearOne or ClearTenOne
clearOne = tonumber(clearOne) or ClearTenOne
if (clearOne > ClearAllOne) then
clearOne = ClearAllOne
end
return number, numberType, clearOne
end
local function IDToChinese(id, numberType) --由上述定義可以知道ID代表意義
if id < Over then --小於Over的id即數字本身
return Data.standard[numberType + 1][id + 1]
elseif id < LargeStart then --Over至LargeStart的id即為萬進前的單位 (如:十、百、千)
return Data.decimal[numberType + 1][id - Over + 1]
elseif id < LargeStart + largeSize then --萬進後的單位
return Data.large[id - LargeStart + 1]
else --用完後單位再進位
return p.LargeName(id - LargeStart)
end
end
local function LessThan10000ToID(number) --低於10000的轉換,用途為中文數字是萬進
local id = {}
table.insert(id, 0) --先丟0讓千位判斷較容易
local numberArr = stringToTable(number .. '')
for i = 1, 4 do
repeat
if id[#id] == 0 and numberArr[i] == '0' then --當連續0的時候直接跳出
break
elseif id[#id] ~= 0 and numberArr[i] == '0' then --當上一位不是0,目前是0時單純加入0不加入千百十的ID
table.insert(id, 0)
break
end
table.insert(id, tonumber(numberArr[i])) --其他狀況就照一般說法位數大小和位數名稱
table.insert(id, Thousand - i + 1)
until true
end
if numberArr[1] ~= '0' then --如果千位不為0把首位0去除
table.remove(id, 1)
end
if #id > 1 then --只有在0000的時候ID個數才會只有1
id[#id] = Over
else
table.insert(id, Over)
end
return id
end
local function FrontNumberToChinese(number, numberType, clearOne) --這邊的前數指的是大數,但實際英文並非如此稱呼,只是要讓名稱淺顯易懂
number = '0000' .. number --把位數補到4的倍數,先補4個0在清除多餘的,下列程式本身有去除首位0的功能,若首位0會自動消除
number = number:sub(#number % 4 + 1)
local numberLargeLength = #number / 4
local id = {}
table.insert(id, 0) --先補0讓最高的萬進位數容易判斷
for i = 1, numberLargeLength do
repeat
local data = LessThan10000ToID(number:sub(i * 4 - 3, i * 4 - 3 + 4))
if id[#id] == 0 and #data == 2 then --如果上一個萬進位數為0,目前也為0(0000的ID個數最後有加上Over,所以為2),則直接跳出
break
elseif id[#id] ~= 0 and #data == 2 then --如果上一個萬進位數不為0,目前為0,補0跳出
table.insert(id, 0)
break
elseif id[#id] == 0 and data[1] == 0 then --如果上一個萬進位數為0,目前的萬進位數首位為0,清除掉首位0
table.remove(data, 1)
end
for j = 1, #data do --將目前的萬進位數加入到ID陣列裡
table.insert(id, data[j])
end
id[#id] = LargeStart + numberLargeLength - i --最後補上萬進位數的ID
until true
end
if #id == 1 then --ID長度為1代表答案是0
return IDToChinese(0, numberType)
end
table.remove(id, 1) --清除首位0
table.remove(id, #id) --清除最後一個元素,有可能是萬進位數的第1個空格ID,也有可能是末位0
if clearOne == ClearTenOne and id[1] == 1 and id[2] == Ten then --如果是選擇清十前1,則必須首位要是一十才清1
table.remove(id, 1)
elseif clearOne == ClearAllOne and id[1] == 1 and #id > 1 then --不管如何首位1都清,但是單獨1不清1
table.remove(id, 1)
end
local chinese = '' --轉成中文回傳
for i = 1, #id do
chinese = chinese .. IDToChinese(id[i], numberType)
end
return chinese
end
local function BackNumberToChinese(number, numberType) --這邊的後數指的是小數,但實際英文並非如此稱呼,只是要讓名稱淺顯易懂
local chinese = ''
local numberLength = #number
local numberArr = stringToTable(number .. '')
while numberLength > 0 do --尋找末位0的個數,並扣除個數
if numberArr[numberLength] ~= '0' then
break
else
numberLength = numberLength - 1
end
end
for i = 1, numberLength do --直接轉成中文回傳
chinese = chinese .. IDToChinese(tonumber(numberArr[i]), numberType)
end
return chinese
end
local function NumberToChinese(number, numberType, clearOne) --轉中文
number = tostring(number) --轉成字串進行作業
Data.standard[Normal + 1][0 + 1] = Data.standard[Financial + 1][0 + 1] --一般數量時大小寫的0皆使用「零」,然後Lua的陣列從1開始
local chinese = ''
number = mw.ustring.gsub(mw.text.trim(number), "%s+", '') --移除空白字元
local check_char = mw.ustring.sub(number, 1, 1) --讀取第一個字以便判斷正負號
if check_char == '+' then --有正號才顯示正
chinese = chinese .. Data.Positive
number = mw.ustring.sub(number, 2, #number)
elseif check_char == '-' or check_char == '−' then
chinese = chinese .. Data.Negative
number = mw.ustring.sub(number, 2, #number) --在此階段仍有可能有非ASCII字元,故使用mw.ustring
end
if number == '∞' or number:lower()=='inf' then
chinese = chinese .. Data.Infinity
return chinese
end
if number:lower()=='nan' then --nan會讓tonumber通過,故提前跳出
chinese = ((chinese == '') and Data.Positive or chinese) .. Data.NotANumber
return chinese
end
if number:find("[%d%.][Ee][%d%+%-]") then --如果有科學記號
if type(calc_lib.scientific2number) ~= type(function()end) then calc_lib = require("Module:Complex_Number/Calculate")end
number = calc_lib.scientific2number(number) --將科學記號轉換成一般數字
end
if number:find("[Ee]") then return Data.NotANumber end --如果轉換完還有e字元代表非法的科學記號
local frontNumber, backNumber = '', ''
local point = number:find('%.')
if point == nil then --如果小數點不存在代表只有大數
frontNumber = number
else --擷取大數部分及小數部分
frontNumber = number:sub(1, point - 1)
backNumber = number:sub(point + 1, #number)
end
if tonumber('0' .. frontNumber) == nil or --lua空字串判斷為非數值,所以一定要至少補1個0
tonumber('0.' .. backNumber .. '0') == nil then --如果這裡有小數點(變為超過1個小數點)則會誤判為數字,所以一定要補1個'0.'
return Data.NotANumber
end
chinese = chinese .. FrontNumberToChinese(frontNumber, numberType, clearOne)
local success, backChinese = pcall(BackNumberToChinese, backNumber, numberType)
if not success then return Data.NotANumber end --遺漏的事項會讓轉換的過程出錯;若出錯,則假定其並非一個數字
if backChinese ~= '' then --如果小數為空字串則連小數點都不加入中文字串
chinese = chinese .. Data.Point .. backChinese
end
return chinese
end
local function NumberToChineseNumbering(number, numberType) --編號用途,編號只敘述數字不敘述進位系統,且有多個「點」和「之」
number = tostring(number) --轉成字串進行作業
Data.standard[Normal + 1][0 + 1] = Data.NumberingZero --一般編號時小寫的0使用「〇」,然後Lua的陣列從1開始
local chinese = ''
local numberLength = #number
local numberArr = stringToTable(number .. '')
for i = 1, numberLength do
if numberArr[i] == '.' then --小數點在編號時中文通常念作「點」
chinese = chinese .. Data.Point
elseif numberArr[i] == '-' then --減號在編號時中文通常念作「之」
chinese = chinese .. Data.Dash
elseif tonumber(numberArr[i]) ~= nil then
chinese = chinese .. IDToChinese(tonumber(numberArr[i]), numberType)
end
end
return chinese
end
local function loadInvChineseData()
Data.number={['叄'] = 3, ['叁'] = 3, ['䦉'] = 4, ['兩'] = 2, ['两'] = 2}
Data.standard[3] = {'零', '壹', '贰', '参', '肆', '伍', '陆', '柒', '捌', '玖'}
Data.standard[4] = {'洞', '么', '兩', '叄', '肆', '伍', '六', '拐', '八', '勾'}
Data.otherNumber = {
['廿'] = {addAt = 2, value = 2}, ['念'] = {addAt = 2, value = 2},
['卅'] = {addAt = 2, value = 3},
['卌'] = {addAt = 2, value = 4},
['皕'] = {addAt = 3, value = 2},
}
local all_large = {Data.large, {'', '万', '亿', '兆', '京', '垓', '秭', '穰', '沟', '涧', '正', '载', '极', '恒河沙', '阿僧祇', '那由他', '不可思议', '无量', '大数'}}
Data.less1000={}
Data.largeTable = {['$'] = 1, [Data.Point] = 1, ['点'] = 1}
for i=1,#(Data.standard) do --建表以利數字查表
for j=1,#(Data.standard[i]) do Data.number[Data.standard[i][j]] = j - 1 end
end
for i=0,9 do Data.number[tostring(i)] = i end
for i=1,#(Data.decimal) do --建表以利單位查表
for j=1,#(Data.decimal[i]) do Data.less1000[Data.decimal[i][j]] = j end
end
Data.maxLarge = 0
for i=1,#all_large do --建表以利大數量詞查表
for j=1,#(all_large[i]) do
if mw.ustring.len(all_large[i][j]) > Data.maxLarge then Data.maxLarge = mw.ustring.len(all_large[i][j]) end
Data.largeTable[all_large[i][j]] = j
end
end
end
local function FindChineseToken(str, i)
local it = mw.ustring.sub(str, i, i)
for j=1,Data.maxLarge do --對照表中所有可能長度的子字串
local lit = mw.ustring.sub(str, i, i + j)
if Data.largeTable[lit] then --如果查表命中
it = lit
i = i + j
break --表示已經找到數字詞彙,跳出
end
end
return ((it=='')and'null'or it), i --查不到返回'null'以利判斷
end
local function LoadChineseToken(input_str)
local str = input_str..'$' --加入結尾符號
local token_list = {}
local i, strlen, it, uit = 1, mw.ustring.len(str), '', ''
local buffer, buffer_flag = {0,0,0,0}, false --每四位數一個緩衝區
while i <= strlen do --逐一數字詞彙查找
it, i = FindChineseToken(str, i)
uit = it
if not(Data.number[it] or Data.otherNumber[it] or Data.less1000[it] or Data.largeTable[it])then
if it~='null'then error(mw.ustring.format("未知的字元 '%s'", it), 2)end
end
if Data.less1000[it] then --單獨的數字量詞(十、百、千),當作省略了 "一" 的表示方式
buffer[Data.less1000[it]] = 1
local check_error, err_i = FindChineseToken(str, i + 1)
if Data.less1000[check_error] then
error(mw.ustring.format("錯誤的用法 '%s'", it..check_error), 2)
end
buffer_flag = true
elseif Data.otherNumber[it] then
buffer[Data.otherNumber[it].addAt] = Data.otherNumber[it].value
local check_error, err_i = FindChineseToken(str, i + 1)
if Data.less1000[check_error] then
error(mw.ustring.format("錯誤的用法 '%s'", it..check_error), 2)
end
buffer_flag = true
elseif Data.number[it] then --讀到數字
if Data.number[it] ~= 0 then
i = i + 1
uit, i = FindChineseToken(str, i) --檢查下一個數字詞彙是否是數字量詞
if not(Data.number[uit] or Data.less1000[uit] or Data.largeTable[uit])then
if uit~='null'then error(mw.ustring.format("未知的字元 '%s'", uit), 2)end
end
local check_error, err_i = FindChineseToken(str, i + 1)
if Data.less1000[check_error] then
error(mw.ustring.format("錯誤的用法 '%s'", uit..check_error), 2)
end
if Data.less1000[uit] then --數字 + 數字量詞 在該位數做紀錄
buffer[Data.less1000[uit]] = Data.number[it]
buffer_flag = true
elseif Data.number[uit] then
error(mw.ustring.format("錯誤的用法 '%s'", it..uit), 2)
else --數字 + 大數量詞 表示本緩衝區(小於1000的子字串)已讀完,儲存token,更新緩衝區
buffer[1] = Data.number[it]
table.insert(token_list, buffer)
buffer = {0,0,0,0}
buffer_flag = false
end
else
local check_next, next_i = FindChineseToken(str, i + 1)
if (Data.largeTable[(check_next == '')and'null'or check_next]) and (not buffer_flag) then
buffer[1] = 1 --省略1的寫法
buffer_flag = true
end
end
end
if Data.largeTable[(uit == '')and'null'or uit] then --讀到的內容是大數量詞
if buffer_flag then --檢查是否有未儲存的緩衝區
table.insert(token_list, buffer) --如有未儲存的緩衝區,儲存token,更新緩衝區
buffer = {0,0,0,0}
buffer_flag = false
end
table.insert(token_list, Data.largeTable[uit]) --儲存大數量詞id到token表
end
i = i + 1
end
if buffer_flag then --還有未儲存的緩衝區表示有萬未以下的數
table.insert(token_list, buffer) --儲存token
end
if type(token_list[1]) ~= type({}) then --最高位如果本身是大數量詞id,當作省略了 "一" 的表示方式
table.insert(token_list, 1, {1,0,0,0})
end
local loop_count, maxId = #token_list, 0 --準備整理token
local result = {}
i, buffer_flag = 1, false
local last_it = {}
while i <= loop_count do
it = token_list[i]
if type(it) == type(0) then --如果是大數量詞id
it = it - 1
uit = token_list[i + 1] --如果是連續的大數量詞id,將之值相加 (lua陣列以1起始所以減1)
while type(uit) == type(0) and i <= loop_count do
it = it + uit - 1
i = i + 1
uit = token_list[i + 1]
end
it = it + 1 --加回1變成id
if it > maxId then maxId = it end --紀錄出現過的最大id
result[it] = last_it --在這id位置下儲存解析完的一組4個數位
buffer_flag = false
else
last_it = it
buffer_flag = true
end
i = i + 1
end
if buffer_flag then --剩餘萬位以下的一組4個數位
result[1] = last_it
if 1 > maxId then maxId = 1 end
end
return result, maxId
end
--{{#invoke:NumberToChinese|Chinese_To_Number}}
function p.Chinese_To_Number(frame)
local input_str = ((type(frame)==type({})) and (frame.args or frame) or {tostring(frame) or ""})[1]
loadInvChineseData() --建立中文數字查表
local str = mw.ustring.gsub(mw.text.trim(input_str), "%s", '')
if str == '' then return '' end
local number_pattern = Data.Point..'点%.'
local large_pattern = ''
local number, sign = '', ''
local check_char = mw.ustring.sub(str, 1, 1) --讀取第一個字以便判斷正負號
if (check_char == Data.Positive or check_char == '+') and not (str == Data.Positive) then --有正號才顯示正
sign = '+'
str = mw.ustring.sub(str, 2, -1)
elseif check_char == Data.Negative or check_char == '负' or check_char == '-' or check_char == '−' then
sign = '-'
str = mw.ustring.sub(str, 2, -1)
elseif check_char == "零" then --處理如 "零下三" 的表示方法
local check_char = mw.ustring.sub(str..'$$', 2, 2)
if check_char == "上" then
sign = '+'
str = mw.ustring.sub(str, 3, -1)
elseif check_char == "下" then
sign = '-'
str = mw.ustring.sub(str, 3, -1)
end
if str == '' then return "0" end
end
if str == '' then return '' end --清完正負號為空表示是空字串
if str == p.NotANumber() then return sign .. 'nan' end
if mw.ustring.match(str,'[無无][穷窮限]') then return sign .. 'inf' end
for name,index in pairs(Data.number) do if type(name) == type("string") then number_pattern = number_pattern .. name end end
for name,index in pairs(Data.less1000) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
for name,index in pairs(Data.largeTable) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
for name,index in pairs(Data.otherNumber) do if type(name) == type("string") then large_pattern = large_pattern .. name end end
if mw.ustring.match(str, "^["..number_pattern.."]+$") then --全部都是數字直接輸出
for i=1,mw.ustring.len(str) do
local it = mw.ustring.sub(str,i,i)
if it == '点' or it == Data.Point or it == '.' then
number = number .. '.'
else
number = number .. (Data.number[it] or 0)
end
end
return sign .. number
end
local find_invalid, find_invalid_end = mw.ustring.find(str, "[^"..number_pattern..large_pattern.."]+")
if find_invalid then
error(mw.ustring.format("輸入的內容含有無效的詞彙 '%s'", mw.ustring.sub(str, find_invalid, find_invalid_end)), 2)
end
local point = mw.ustring.find(str, "[点"..Data.Point.."%.]")
local frontStr, backStr = '', ''
if point == nil then --如果小數點不存在代表只有大數
frontStr = str
else --擷取大數部分及小數部分
frontStr = mw.ustring.sub(str, 1, point - 1)
backStr = mw.ustring.sub(str, point + 1, -1)
end
local tokens, token_len = LoadChineseToken(frontStr)
local zero_flag = true
for i=token_len,1,-1 do
for j = 4,1,-1 do
local it = (tokens[i]or{})[j] or 0
if zero_flag then
if it ~= 0 then
zero_flag = false
number = number .. it
end
else
number = number .. it
end
end
end
if backStr ~= '' then
number = number .. '.'
for i=1,mw.ustring.len(backStr) do
local it = mw.ustring.sub(backStr,i,i)
if Data.number[it] then
number = number .. Data.number[it]
end
end
end
return sign .. number
end
function p.NotANumber() return Data.NotANumber end --用於模板內字串判斷
function p.Infinity() return Data.Infinity end
function p.LargeName(frame)
local index = ((type(frame)==type({})) and (frame.args or frame) or {tonumber(tostring(frame)) or 1})[1]
local overlying, quotient = '', index
local remainder = quotient % (largeSize - 1)
local largecount = (quotient - remainder) / (largeSize - 1)
overlying = Data.large[remainder + 1]
for i = 1, largecount do
overlying = overlying .. Data.large[largeSize]
end
return overlying
end
--{{#invoke:NumberToChinese|Number_to_Chinese}}
function p.Number_to_Chinese(frame) --進位系統的中文數字
return NumberToChinese(argsToVariable(frame))
end
function p.Number_to____(frame) --編號的中文數字
return NumberToChineseNumbering(argsToVariable(frame))
end
--{{#invoke:NumberToChinese|Number_to_Numbering}}
function p.Number_to_Numbering(frame) --編號的中文數字
return NumberToChineseNumbering(argsToVariable(frame))
end
--轉給其他模組使用,但為了避免影響其他運作中模板,改為函數呼叫
--因此此函數命名遵照[[Wikipedia:Lua代码风格#命名常规]]
function p._numberToChinese(number, numberType, clearOne)
return NumberToChinese(tostring(tostring(number)), numberType or 0, clearOne or 0) --轉中文
end
function p._numberToNumbering(number, numberType)
return NumberToChinese(tostring(tostring(number)), numberType or 0) --轉中文
end
return p