मॉड्यूल:ain-kana-conv
"इस मॉड्यूल हेतु प्रलेख मॉड्यूल:ain-kana-conv/doc पर बनाया जा सकता है"
local export = {}
local CONSONANTS = {
"p", "t", "c", "k",
"m", "n", "s", "h",
"w", "r", "y", "'"
}
local VOWELS = {
"a", "i", "u", "e", "o",
"á", "í", "ú", "é", "ó",
}
local ACCENT_CONVERSION_TABLE = {
["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o",
}
local CONVERSION_TABLE = {
[ "a"] = "ア", [ "i"] = "イ", [ "u"] = "ウ", [ "e"] = "エ", [ "o"] = "オ",
["'a"] = "ア", ["'i"] = "イ", ["'u"] = "ウ", ["'e"] = "エ", ["'o"] = "オ",
["ka"] = "カ", ["ki"] = "キ", ["ku"] = "ク", ["ke"] = "ケ", ["ko"] = "コ",
["sa"] = "サ", ["si"] = "シ", ["su"] = "ス", ["se"] = "セ", ["so"] = "ソ",
["ta"] = "タ", ["tu"] = "ト゚", ["te"] = "テ", ["to"] = "ト",
["ca"] = "チャ", ["ci"] = "チ", ["cu"] = "チュ", ["ce"] = "チェ", ["co"] = "チョ",
["na"] = "ナ", ["ni"] = "ニ", ["nu"] = "ヌ", ["ne"] = "ネ", ["no"] = "ノ",
["ha"] = "ハ", ["hi"] = "ヒ", ["hu"] = "フ", ["he"] = "ヘ", ["ho"] = "ホ",
["pa"] = "パ", ["pi"] = "ピ", ["pu"] = "プ", ["pe"] = "ペ", ["po"] = "ポ",
["ma"] = "マ", ["mi"] = "ミ", ["mu"] = "ム", ["me"] = "メ", ["mo"] = "モ",
["ya"] = "ヤ", ["yi"] = "イ", ["yu"] = "ユ", ["ye"] = "イェ", ["yo"] = "ヨ",
["ra"] = "ラ", ["ri"] = "リ", ["ru"] = "ル", ["re"] = "レ", ["ro"] = "ロ",
["wa"] = "ワ", ["wi"] = "ヰ", ["we"] = "ヱ", ["wo"] = "ヲ",
["nn"] = "ン", ["tt"] = "ッ"
}
local CODA_CONS = {
["w"] = "ゥ", ["y"] = "ィ",
["m"] = "ㇺ", ["n"] = "ㇴ", ["N"] = "ン",
["s"] = "ㇱ",
["p"] = "ㇷ゚", ["t"] = "ㇳ", ["k"] = "ㇰ"
}
local CODA_VARA = {
["r"] = {
["a"] = "ㇻ", ["i"] = "ㇼ", ["u"] = "ㇽ", ["e"] = "ㇾ", ["o"] = "ㇿ"
},
["h"] = {
["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
}
}
function in_values(item, items)
for _, v in pairs(items) do
if v == item then
return true
end
end
return false
end
function in_keys(item, items)
for k, _ in pairs(items) do
if k == item then
return true
end
end
return false
end
local function convert_syllable(syllable, next_char)
local l_syllable = mw.ustring.len(syllable)
local remains = syllable
local coda = ""
local last_char = mw.ustring.sub(syllable, mw.ustring.len(syllable), mw.ustring.len(syllable))
if in_keys(last_char, CODA_CONS) then
remains = mw.ustring.sub(remains, 1, l_syllable - 1)
coda = CODA_CONS[last_char]
-- ruunpe ルウㇴペ?ルウンペ? 暫定的に後者を取る
if last_char == "n" and (next_char ~= nil and next_char ~= "") then
coda = CONVERSION_TABLE['nn']
end
elseif in_keys(last_char, CODA_VARA) then
remains = mw.ustring.sub(remains, 1, l_syllable - 1)
local second_last_char = mw.ustring.sub(syllable, l_syllable - 1, l_syllable - 1)
coda = CODA_VARA[last_char][second_last_char]
end
local accented_flag = false
local nucleus = mw.ustring.sub(remains, mw.ustring.len(remains), mw.ustring.len(remains))
if in_keys(nucleus, ACCENT_CONVERSION_TABLE) then
accented_flag = true
remains = mw.ustring.sub(remains, 1, mw.ustring.len(remains) - 1) .. ACCENT_CONVERSION_TABLE[nucleus]
end
if in_keys(remains, CONVERSION_TABLE) then
remains = CONVERSION_TABLE[remains]
elseif in_keys(mw.ustring.lower(remains), CONVERSION_TABLE) then
remains = CONVERSION_TABLE[mw.ustring.lower(remains)]
else
error("cannot find katakana for CV pair: ‘" .. remains .. "’")
end
local converted = remains .. coda
if accented_flag then
converted = "<u style='text-decoration:overline;'>" .. converted .. "</u>"
end
return converted
end
local function do_convert(temp)
-- function export.do_convert(temp)
-- Extensibility を考慮せねばならない
-- # N ン 記号
-- # Pawci-Kamuy 固有名詞
-- # accent 記号やその他特別表記 %u %l はそれらを含む。%a はひらがな・カタカナも含むのでダメ。
local ignore_chars = "%-=."
local valid_pattern = "[%u%l'" .. ignore_chars .. "]+"
-- TODO: hotne = ホッネ?ホㇳネ?
-- TODO: wan e-tu ワㇴ エト゚? ワネト゚?
-- If contains more than alphabets and symbols -> e.g. {{ain-kana|hoy'oy|ヒオイオイ}} -> カナ表記 ヒオィオィ/ヒオイオイ
-- -- Dectect カタカタ if detected do nothing but return the original value
-- CONVERSION_TABLE にあるかどうかだけで判断するのは早計すぎたので、変更
if not mw.ustring.match(temp, valid_pattern) then
return temp
end
-- if in_values(mw.ustring.sub(temp, 1, 1), CONVERSION_TABLE) then
-- return temp
-- end
-- Normalize
-- # Lower
-- temp = temp:lower() N のためにここで lower しない
-- # Clear special characters such as "-", ".", "="
temp = mw.ustring.gsub(temp, "[" .. ignore_chars .. "]", "")
local group_ids = {}
-- TODO: iyayiraykere = イヤィイラィケレ ? イヤイラィケレ? 暫定的に後者を取る
-- Count syllables by vowels and save to a map with onset and nucleus marked
local syllable_count = 1
local i = 1
for char in mw.ustring.gmatch(temp, ".") do
-- print("Current Char (No. " .. tostring(i) .. "): " .. char)
if in_values(char, VOWELS) then
-- print("-- Current Vowel: " .. char)
-- print("-- Char Before: " .. temp:sub(i - 1, i - 1))
local char_before = mw.ustring.sub(temp, i - 1, i - 1)
if in_values(char_before, CONSONANTS) or in_values(mw.ustring.lower(char_before), CONSONANTS) then
-- print("---- Char Before is Consonant")
group_ids[i - 1] = syllable_count
end
group_ids[i] = syllable_count
syllable_count = syllable_count + 1
end
i = i + 1
end
local str_buffer = ""
for i = 1, mw.ustring.len(temp) do
if group_ids[i] ~= nil then
str_buffer = str_buffer .. group_ids[i]
else
str_buffer = str_buffer .. "X"
end
end
-- error("group_indicies: " .. str_buffer .. "<br>" .. "group_contents: " .. temp)
-- Fill codas
local i = 1
for char in mw.ustring.gmatch(temp, ".") do
if group_ids[i] == nil then
group_ids[i] = group_ids[i - 1]
end
i = i + 1
end
local result = ""
local i = 1
local current_group_id = 1
local head = 1
local tail = 1
local content = ""
-- while i < mw.ustring.len(temp) do
for i = 1, mw.ustring.len(temp) do
-- print("group_id " .. tostring(group_ids[i]) .. " " .. tostring(temp:sub(i, i)))
if group_ids[i] ~= current_group_id then
current_group_id = group_ids[i]
tail = i - 1
content = mw.ustring.sub(temp, head, tail)
-- print("(head, tail) = " .. tostring(head) .. " " .. tostring(tail))
-- print(content)
result = result .. convert_syllable(content, mw.ustring.sub(temp, i, i))
head = i
end
-- i = i + 1
end
content = mw.ustring.sub(temp, head, mw.ustring.len(temp))
-- print(content)
result = result .. convert_syllable(content)
return result
end
-- local function valid_ainu_word(word)
-- -- TODO:
-- end
function export.debug(word)
return do_convert(word)
end
function export.convert(frame)
-- Get Args
local params = {
[1] = {list = true, allow_holes = true}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
-- Do Conversion
local items = {}
for i = 1, math.max(args[1].maxindex, 1) do
local original_str = args[1][i]
if not original_str or original_str == "" then
original_str = mw.title.getCurrentTitle().text
-- else
-- original_str = frame:preprocess(original_str)
end
local converted_words = {}
for word in mw.text.gsplit(original_str, " ") do
-- error(do_convert(word))
table.insert(converted_words, do_convert(word))
end
table.insert(items, table.concat(converted_words, " "))
end
return table.concat(items, "/")
end
return export