Módulo:generar-pron/ro
Apariencia
La documentación para este módulo puede ser creada en Módulo:generar-pron/ro/doc
-- RUMANO
-- Hecho por Tmagc, con algunos fragmentos de código extraídos de en.wikt
local export = {}
local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat
local m_str = require("Módulo:String")
local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strcount = m_str.count
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strupper = m_str.upper
local strlower = m_str.lower
local strucfirst = m_str.ucfirst
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html
local function repl_last(source_str, pattern, replace, count)
if source_str == '' or pattern == '' or count <= 0 then
return source_str;
end
local last_n_matches = {}
local cycle = 1
local matches_found = 0
local i = nil
while true do
local mstart, mend = mw.ustring.find(source_str, pattern, i)
if mstart == nil then break end
last_n_matches[cycle] = { mstart, mend }
cycle = cycle + 1
if cycle > count then cycle = 1 end
if matches_found < count then matches_found = matches_found + 1 end
i = mend + 1
end
-- intentional; reverse sort
table.sort(last_n_matches, function(a, b) return a[1] > b[1] end)
local result = source_str
for _, pair in ipairs(last_n_matches) do
local mstart, mend = unpack(pair)
result = mw.ustring.sub(result, 1, mstart - 1) .. mw.ustring.gsub(mw.ustring.sub(result, mstart, mend), pattern, replace) .. mw.ustring.sub(result, mend + 1)
end
return result;
end
--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ag = u(0x0301) -- acute = ́
local gr = u(0x0300) -- grave = ̀
local circ = u(0x0302)
local breve = u(0x306)
local coma = u(0x326)
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)
local TILDE = "[" .. ag .. gr .. "]"
local DIACR_NO_TILDE = "[" .. breve .. coma .. circ .. "]"
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"
local vocales = "aăâeiîouyAĂÂEIÎOUY"
local consonantes = "bcdfghjklmnpqrsștțvwxzBCDFGHJKLMNPQRSȘTȚVWXZ"
local vocales_salvo_i = "aăâeîouyAĂÂEÎOUY"
local VOCAL = "["..vocales.."]"
local VOCAL_SALVO_I = "["..vocales_salvo_i.."]"
local CONS = "["..consonantes.."]"
local divsil = u(0xFFF0)
local divsil_fijo = u(0xFFF1)
local sepsil = "%-." .. divsil .. divsil_fijo
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"
local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹]"
local permitido = vocales..consonantes..ag..gr..separador.."·|%s" --asumo que limpié la puntuación
local function normalizar(texto)
texto = strlower(texto)
texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
texto = strnfd(texto)
texto = strsubrep(texto, "("..TILDE..")("..DIACR_NO_TILDE..")", "%2%1")
texto = strsubn(texto, "."..DIACR_NO_TILDE, {
["a" .. circ] = "â",
["A" .. circ] = "Â",
["a" .. breve] = "ă",
["A" .. breve] = "Ă",
["i" .. circ] = "î",
["I" .. circ] = "Î",
["s" .. coma] = "ș",
["S" .. coma] = "Ș",
["t" .. coma] = "ț",
["T" .. coma] = "Ț",
})
if strfind(texto, "[^"..permitido.."]") then
error("caracteres no permitidos en el texto")
end
texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
texto = strsubrep(texto, "%s+", " ")
texto = strstrip(texto, "[%s|]+")
return texto
end
local function separar_en_silabas(word)
word = strsubrep(word, "(" .. VOCAL .. TILDE .. "*" .. CONS .. "+)(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")
word = strsubrep(word, "(" .. VOCAL .. TILDE .. "*" .. ")(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")
-- Juntar consonantes fricativas y oclusivas con l y con r. A ecepción de dl.
word = strsubn(word, "([pbfvkctg])"..divsil.."([lrɾ])", divsil.."%1%2")
word = strsubn(word, "d%"..divsil.."([rɾ])", divsil.."d%1")
-- swing, switch, etc.
word = strsubn(word, "s"..divsil.."w", divsil.."sw")
word = strsubn(word, "l"..divsil.."l", divsil.."ll")
word = strsubn(word, "r"..divsil.."r", divsil.."rr")
word = strsubn(word, "c"..divsil.."h([ei])", divsil.."ch%1")
word = strsubn(word, "g"..divsil.."h([ei])", divsil.."gh%1")
-- hiatos
word = strsubn(word, "(" .. VOCAL .. ")(" .. TILDE .. ")(" .. VOCAL .. "+)", function(a,b,c) return a == c and a..b..c or a..b..divsil..c end)
word = strsubn(word, "(" .. VOCAL_SALVO_I .. ")([iI]" .. VOCAL_SALVO_I .. "+)", "%1"..divsil.."%2")
word = strsubn(word, "([aeoAEO])%1", "%1"..divsil.."%1")
word = strsubn(word, "([aoAO])([eE])", "%1"..divsil.."%2")
-- lupi (unica sílaba cuando i final precedida de consonante)
word = strsubn(word, divsil.."+("..CONS.."+)i$", "%1ʲ")
word = strsubn(word, divsil.."+("..CONS.."+)i(%s)", "%1ʲ%2")
word = strsubn(word, SEPARADORES_SILABICOS.."+", ".")
word = strstrip(word, SEPARADORES_SILABICOS.."+")
return word
end
local V = "[aeiouəɨ]"
local C = "[bkdfɡhʒlmnprstvzʃw]"
local X = "j[bkdfɡhʒlmnprstvzʃjwaəeiɨou]"
local U = "j[aeəɨiou]"
local mapeo_vocales = {
["a"]="a", ["e"]="e", ["i"]="i", ["o"]="o", ["u"]="u", ["ă"]="ə", ["â"]="ɨ", ["î"]="ɨ",
}
local mapeo_1 = {
["b"]="b", ["c"]="k", ["d"]="d",
["f"]="f", ["g"]="ɡ", ["h"]="h", ["j"]="ʒ",
["k"]="k", ["l"]="l", ["m"]="m", ["n"]="n",
["p"]="p", ["q"]="k", ["r"]="r", ["s"]="s", ["t"]="t",
["v"]="v", ["x"]="ks",
["ș"]="ʃ", ["ț"]="t͡s",
}
local mapeo_1b = {
["y"]="j",
["w"]="v"
}
local mapeo_2 = {
["ce"] = "t͡ʃe",
["ci"] = "t͡ʃi",
["che"] = "ke",
["chi"] = "ki",
["ge"] = "d͡ʒe",
["gi"] = "d͡ʒi",
["ghe"] = "ɡe",
["ghi"] = "ɡi",
}
local diptongos = {
["ai"] = "aj", ["au"] = "aw", ["ei"] = "ej",
["eu"] = "ew", ["ii"] = "ij",
["oi"] = "oj", ["ou"] = "ow", ["ui"] = "uj",
["uu"] = "uw", ["ăi"] = "əj", ["ău"] = "əw",
["âi"] = "ɨj", ["âu"] = "ɨw",
["ia"] = "ja", ["ie"] = "je", ["io"] = "jo",
["iu"] = "ju", ["ue"] = "we",
["ua"] = "wa", ["uă"] = "wə", ["uâ"] = "wɨ",
}
local triptongos = {
["eai"] = "eaj", ["eau"] = "eaw", ["eoa"] = "eoa",
["iai"] = "jaj", ["iau"] = "jaw", ["iei"] = "jej",
["eu"] = "jew", ["ioi"] = "joj", ["iou"] = "jow",
["oai"] = "oaj", ["uai"] = "waj", ["uau"] = "waw",
["uăi"] = "wəj", ["ioa"] = "joa"
}
local fonetico = {
["ea"] = "e̯a", ["eo"] = "e̯o", ["oa"] = "o̯a"
}
local function generar_pron(texto)
texto = normalizar(texto)
texto = strlower(texto)
local convertido = {}
local fragmentos = strsplit(texto, "%s*|%s*")
for _,fragmento in ipairs(fragmentos) do
local palabras = strsplit(fragmento, "%s")
local palabras_convertidas = {}
for _,p in ipairs(palabras) do
p = separar_en_silabas(p)
if strfind(p, TILDE) then
p = strsubrep(p, "^(.*)"..SEPARADOR.."(.-)"..TILDE, "%1"..ac_primario.."%2")
p = strsubn(p, "^(.-)"..TILDE, ac_primario.."%1")
end
for a,b in pairs(mapeo_2) do
p = strsubn(p, a, b)
end
for a,b in pairs(mapeo_1) do
p = strsubn(p, a, b)
end
for a,b in pairs(mapeo_1b) do
p = strsubn(p, a, b)
end
for a,b in pairs(triptongos) do
p = strsubn(p, a, b)
end
for a,b in pairs(diptongos) do
p = strsubn(p, a, b)
end
for a,b in pairs(mapeo_vocales) do
p = strsubn(p, a, b)
end
p = strsubn(p, "("..SEPARADORES_SILABICOS..")ks", "k%1s")
p = strsubn(p, "("..SEPARADORES_SILABICOS..")ij", "%1ji")
p = strsubn(p, "(" .. C .. ")[ij]$", "%1ʲ")
insert(palabras_convertidas, p)
end
insert(convertido, concat(palabras_convertidas, " "))
end
local fono = concat(convertido, " | ")
-- fonetico --> REVISAR: realmente esto es fonético ?? se me hace que sigue siendo fonológico
for a, b in pairs(fonetico) do
fono = strsubn(fono, a, b)
end
return {{strhtml(fono)}}
end
function export.procesar_pron_args(tit, args)
local vino_ayuda = true
if not args["ayuda"][1] then
vino_ayuda = false
args["ayuda"][1] = tit
end
if #args["fone"] < 1 and #args["fono"] < 1 then
local rimas, ls, ac = {}, {}, {}
local tiene_espacios = strfind(tit, " ")
local A = #args["ayuda"]
local j = 1 -- indice de la ayuda
local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
while k <= 9 and j <= A do
if not tiene_espacios then
local div = strsubn(separar_en_silabas(normalizar(args["ayuda"][j])), "%.", "-")
insert(args["d"], div)
local longsib = strcount(div, "%-") + 1
ls[longsib] = true
if longsib == 1 then
ac["monosílaba"] = true
else
local t_idx = strfind(div, TILDE)
if t_idx then
local resto = substr(div, t_idx+1)
local ssil = strcount(resto, SEPARADORES_SILABICOS)
if ssil == 0 then
ac["aguda"] = true
elseif ssil == 1 then
ac["llana"] = true
elseif ssil == 2 then
ac["esdrújula"] = true
else
ac["sobreeesdrújula"] = true
end
end
end
end
local fono = generar_pron(args["ayuda"][j])
local rim = fono[1][1]
rim = strsubn(rim, "^.*"..ac_primario.."(.-)$", "%1")
rim = strsubn(rim, ".-".."("..V..".*"..")".."$", "%1")
rimas[rim] = true
for i,_ in ipairs(fono) do
if vino_ayuda and args["ayuda"][j] then
insert(args["fgraf"], {args["ayuda"][j]})
end
insert(args["fono"], fono[i])
k = k + 1
if k > 9 then
break
end
end
j = j + 1
end
for rim,_ in pairs(rimas) do
insert(args["rima"], rim)
end
for lon,_ in pairs(ls) do
insert(args["ls"], lon)
end
for ace,_ in pairs(ac) do
insert(args["ac"], ace)
end
end
return args
end
return export