मॉड्यूल:bho-IPA2
"इस मॉड्यूल हेतु प्रलेख मॉड्यूल:bho-IPA2/doc पर बनाया जा सकता है"
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local consonants = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषहड़ढ़"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ"
local lvowel = "आईऊएओाीूेो"
local el = "िु"
local conv = {
-- consonants
['क'] = 'k', ['ख'] = 'kʰ', ['ग'] = 'ɡ', ['घ'] = 'ɡʱ', ['ङ'] = 'ŋ',
['च'] = 'ʦ', ['छ'] = 'ʦʰ', ['ज'] = 'ʣ', ['झ'] = 'ʣʱ', ['ञ'] = 'ɲ',
['ट'] = 'ʈ', ['ठ'] = 'ʈʰ', ['ड'] = 'ɖ', ['ढ'] = 'ɖʱ', ['ण'] = 'ɳ',
['त'] = 't', ['थ'] = 'tʰ', ['द'] = 'd', ['ध'] = 'dʱ', ['न'] = 'n',
['प'] = 'p', ['फ'] = 'pʰ', ['ब'] = 'b', ['भ'] = 'bʱ', ['म'] = 'm',
['य'] = 'j', ['र'] = 'ɾ', ['ल'] = 'l', ['व'] = 'ʋ',
['श'] = 's', ['ष'] = 's', ['स'] = 's', ['ह'] = 'ɦ',
['क़'] = 'q', ['ख़'] = 'x', ['ग़'] = 'ɣ', ['ऴ'] = 'ɭ',
['ळ'] = 'ɭ', ['ज़'] = 'z', ['श़'] = 'ʒ', ['झ़'] = 'ʒ',
['ड़'] = 'ɽ', ['ढ़'] = 'ɽʱ', ['फ़'] = 'f', ['थ़'] = 'θ',
['द़'] = 'ð', ['ऩ'] = 'n̪', ['ऱ'] = 'ɹ', ['ॽ'] = "ʔ", ['ॹ'] = 'ʒ',
-- vowel diacritics
['ि'] = 'ɪ', ['ु'] = 'ʊ', ['े'] = 'eː', ['ॆ'] = 'e', ['ॊ'] = 'o', ['ो'] = 'oː', ['ा'] = 'aː', ['ी'] = 'iː', ['ू'] = 'uː', ['ृ'] = 'ri', ['ॄ'] = 'ri', ['ॢ'] = 'liɾi', ['ॣ'] = 'liɾi', ['ै'] = 'əɪ', ['ौ'] = 'əʊ', ['ॉ'] = 'ɔ', ['ॅ'] = 'æ', ['ꣿ'] = 'əɪ', ['ॏ'] = 'əʊ', ['ऺ'] = "ᵊ",
-- vowel signs
['अ'] = 'ə', ['इ'] = 'ɪ', ['उ'] = 'ʊ', ['ए'] = 'eː', ['ओ'] = 'oː', ['आ'] = 'aː', ['ई'] = 'iː', ['ऊ'] = 'uː', ['ऋ'] = 'ri', ['ॠ'] = 'ri', ['ऌ'] = 'liɾi', ['ॡ'] = 'liɾi', ['ऐ'] = 'əɪ', ['औ'] = 'əʊ', ['ऑ'] = 'ɔ', ['ॲ'] = 'æ', ['ऍ'] = 'æ', ['ऎ'] = 'e', ['ऒ'] = 'o', ['ꣾ'] = 'əĕ', ['ॵ'] = 'əŏ', ['व़'] = 'ŏ',['य़'] = 'ĕ', ['ॴ'] = 'a',
['िं'] = 'ɪ̃', ['ुँ'] = 'ʊ̃', ['ें'] = 'ẽː', ['ॆं'] = 'ẽ', ['ॊं'] = 'õ', ['ों'] = 'õː', ['ाँ'] = 'ãː', ['ीं'] = 'ĩː', ['ूँ'] = 'ũː', ['ैं'] = 'ə̃ɪ̃', ['ौं'] = 'ə̃ʊ̃', ['ॉं'] = 'ɔ̃', ['ॅं'] = 'æ̃', ['ꣿं'] = 'ə̃ɪ̃', ['ॏं'] = 'ə̃ʊ̃',
['꣱'] = "əː",
-- chandrabindu
['ँ'] = '̃',
-- anusvara
['ं'] = 'ṃ',
-- visarga
['ः'] = 'ʰ',
-- virama
['्'] = '',
-- om
['ॐ'] = 'oːm',
-- anusvara
['ऽ'] = 'əː',
-- zero-width non joiner
[''] = '',
-- zero-width joiner
[''] = 'ə',
-- diphthong marker
['ॱ'] = '̯',
-- numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4', ['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
-- punctuation
['।'] = '.', -- danda
['॥'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['॰'] = '.',
}
local nasal_assim = {
["क"] = "ङ", ["ख"] = "ङ", ["ग"] = "ङ", ["घ"] = "ङ",
["च"] = "ञ", ["छ"] = "ञ", ["ज"] = "ञ", ["झ"] = "ञ",
["ट"] = "ण", ["ठ"] = "ण", ["ड"] = "ण", ["ढ"] = "ण",
["प"] = "म", ["फ"] = "म", ["ब"] = "म", ["भ"] = "म", ["म"] = "म",
["त"] = "न", ["थ"] = "न", ["द"] = "न", ["ध"] = "न", ["न"] = "न"
}
local perm_cl = {
["म्ल"] = true, ["व्ल"] = true, ["न्ल"] = true
}
local all_cons, special_cons = "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवसशषह", "छकखगतसहयथडढठपदणधरषटलवब भडचनशम"
local vowel, vowel_sign = "aिुृेोाीूैौॉॅॆॊऻऻॊॆॏ꣱꣱’", "अइउएओआईऊऋॠऎऒव़य़ॵॳॴऐऔऑऍ"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])ə(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text =
gsub(
text,
"([" .. all_cons .. "]़?)([" .. vowel .. "्]?)",
function(c, d)
return c .. (d == "" and "ə" or d)
end
)
for word in mw.ustring.gmatch(text, "[ऀ-ॿə]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^ə(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)
return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])
or match(first .. second, 'य[ी]'))
and 'ə' or "") .. opt .. first .. second .. third end)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2ᵊ%3%4')
end
word =
gsub(
word,
"(.?)ं(.)",
function(succ, prev)
return succ ..
(succ .. prev == "ə" and "्म" or
(succ == "" and match(prev, "[" .. vowel .. "]") and "̃" or nasal_assim[succ] or "̃")) ..
prev
end
)
local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, "ज्ञ", "gj")
text = gsub(text, "इऺ", "ɪ̆" )
text = gsub(text, "उऺ", "ʊ̆" )
text = gsub(text, "ॳ", "ᵊ" )
text = gsub(text, "अ꣱", "əː")
end
text = gsub(text, '.़?', conv)
text = gsub(text, "[<>]", "")
text = gsub(text, "ॱ", "")
text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍṇɽtʈɖdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣsʰʱhɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?))$", "%1ˑ%2%3%4%5%6%7%8%9")
text= gsub(text, "([aāäeâôoʌiuɪʊe̯eëəᵊ])ː([kɦgɕʑṅcjñṭḍʈɖṇɽtdnʦʣpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcjñṭḍṇɽtdnʈɖpbmɽ̃yrlɳwvɾjwśṣshqxġzžḻṛṟfθðṉ]?[ʰʱ]?)([aāäeâôoʌiuɪʊɨʉe̯eëəᵊ](ː?))([kɦgṅcʈɖjñṭḍṇɽtdnpbmɽ̃yrlɳwvɾjwśṣshɕʑqxʦʣġzžḻṛṟfθðṉ]?[ʰʱɦh]?)([ɨʉɪᵊʊ]?(̃?)) ", "%1ˑ%2%3%4%5%6%7%8%9 ")
text = gsub(text, 'ɦri', 'ri')
text = gsub(text, 'ː̃', '̃ː')
text = gsub(text, 'ː̃ː', '̃ː')
text = gsub(text, 'ː̤ː', 'ː')
text = gsub(text, "a([ɪʊ])̃", "a%̃1")
text = gsub(text, "ʊʊ", "ʊ")
text = gsub(text, "([iuɪʊïüaô])(ː?)ᵊ", "%1%2")
text = gsub(text, "əᵊ", "ə")
text = gsub(text, "ᵊə", "ə")
text = gsub(text, "əə", "ə")
text = gsub(text, "ᵊ([ɪʊ])", "ə%1")
text = gsub(text, "ə([ɪʊ])", "ə%1")
text = gsub(text, "([ɪʊ])̯̯", "%1")
text = gsub(text, "ɪɪ", "ɪ")
text = gsub(text, "ʦ", "t͡ɕ")
text = gsub(text, "ʣ", "d͡ʑ")
text = gsub(text, "ĕ", "ĕ")
text = gsub(text, "ŏ", "ŏ")
text = gsub(text, 'ːː', 'ː')
text = gsub(text, "aːˑ", "ə")
text = gsub(text, "iːˑ", "ɪ")
text = gsub(text, "uːˑ", "ʊ")
text = gsub(text, "eːˑ", "e")
text = gsub(text, "oːˑ", "o")
text = gsub(text, "aˑ", "ə")
text = gsub(text, "iˑ", "ɪ")
text = gsub(text, "uˑ", "ʊ")
text = gsub(text, "eˑ", "e")
text = gsub(text, "oˑ", "o")
text = gsub(text, "ə̃əː", "ə̃ː")
return mw.ustring.toNFC(text)
end
return export