Módulo:generar-pron/ro

La documentación para este módulo puede ser creada en Módulo:generar-pron/ro/doc
-- RUMANO
-- Hecho por Tmagc, con algunos fragmentos de código extraídos de en.wikt

local export = {}

local unpack = unpack or table.unpack
local insert = table.insert
local concat = table.concat

local m_str = require("Módulo:String")

local u = m_str.char
local strfind = m_str.find
local strsubn = m_str.gsub
local strcount = m_str.count
local strsubb = m_str.gsubb
local strmatchit = m_str.gmatch
local strsubrep = m_str.gsub_rep
local strsplit = m_str.split
local strupper = m_str.upper
local strlower = m_str.lower
local strucfirst = m_str.ucfirst
local strnfd = m_str.toNFD
local strnfc = m_str.toNFC
local strstrip = m_str.strip
local substr = m_str.sub
local strlen = m_str.len
local strexplode = m_str.explode_utf8
local strhtml = m_str.encode_html

local function repl_last(source_str, pattern, replace, count)
	if source_str == '' or pattern == '' or count <= 0 then
		return source_str;
	end
    
	local last_n_matches = {}
	local cycle = 1
	local matches_found = 0
	local i = nil

	while true do
		local mstart, mend = mw.ustring.find(source_str, pattern, i)
		if mstart == nil then break end
		last_n_matches[cycle] = { mstart, mend }
		cycle = cycle + 1
		if cycle > count then cycle = 1 end
		if matches_found < count then matches_found = matches_found + 1 end
		i = mend + 1
	end

	-- intentional; reverse sort
	table.sort(last_n_matches, function(a, b) return a[1] > b[1] end)

	local result = source_str

	for _, pair in ipairs(last_n_matches) do
		local mstart, mend = unpack(pair)
		result = mw.ustring.sub(result, 1, mstart - 1) .. mw.ustring.gsub(mw.ustring.sub(result, mstart, mend), pattern, replace) .. mw.ustring.sub(result, mend + 1)
	end

	return result;
end

--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ag = u(0x0301) -- acute =  ́
local gr = u(0x0300) -- grave =  ̀
local circ = u(0x0302)
local breve = u(0x306)
local coma = u(0x326)

local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)

local TILDE = "[" .. ag .. gr .. "]"
local DIACR_NO_TILDE = "[" .. breve .. coma .. circ .. "]"
local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"

local vocales = "aăâeiîouyAĂÂEIÎOUY"
local consonantes = "bcdfghjklmnpqrsștțvwxzBCDFGHJKLMNPQRSȘTȚVWXZ"
local vocales_salvo_i = "aăâeîouyAĂÂEÎOUY"
local VOCAL = "["..vocales.."]"
local VOCAL_SALVO_I = "["..vocales_salvo_i.."]"
local CONS = "["..consonantes.."]"
local divsil = u(0xFFF0)
local divsil_fijo = u(0xFFF1)
local sepsil = "%-." .. divsil .. divsil_fijo
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local SALVO_SEPARADORES_SILABICOS = "[^" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹]"

local permitido = vocales..consonantes..ag..gr..separador.."·|%s" --asumo que limpié la puntuación

local function normalizar(texto)
	texto = strlower(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)

	texto = strnfd(texto)
    texto = strsubrep(texto, "("..TILDE..")("..DIACR_NO_TILDE..")", "%2%1")
    texto = strsubn(texto, "."..DIACR_NO_TILDE, {
		["a" .. circ] = "â",
		["A" .. circ] = "Â",
		["a" .. breve] = "ă",
		["A" .. breve] = "Ă",
		["i" .. circ] = "î",
		["I" .. circ] = "Î",
		["s" .. coma] = "ș",
		["S" .. coma] = "Ș",
		["t" .. coma] = "ț",
		["T" .. coma] = "Ț",
    })

	if strfind(texto, "[^"..permitido.."]") then
		error("caracteres no permitidos en el texto")
	end

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")

	return texto
end

local function separar_en_silabas(word)
	word = strsubrep(word, "(" .. VOCAL .. TILDE .. "*" .. CONS .. "+)(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")
    word = strsubrep(word, "(" .. VOCAL .. TILDE .. "*" .. ")(" .. CONS .. VOCAL .. ")", "%1"..divsil.."%2")
	-- Juntar consonantes fricativas y oclusivas con l y con r. A ecepción de dl.
	word = strsubn(word, "([pbfvkctg])"..divsil.."([lrɾ])", divsil.."%1%2")
	word = strsubn(word, "d%"..divsil.."([rɾ])", divsil.."d%1")

	 -- swing, switch, etc.
	word = strsubn(word, "s"..divsil.."w", divsil.."sw")
	word = strsubn(word, "l"..divsil.."l", divsil.."ll")
	word = strsubn(word, "r"..divsil.."r", divsil.."rr")
	word = strsubn(word, "c"..divsil.."h([ei])", divsil.."ch%1")
	word = strsubn(word, "g"..divsil.."h([ei])", divsil.."gh%1")

	-- hiatos
	word = strsubn(word, "(" .. VOCAL .. ")(" .. TILDE .. ")(" .. VOCAL .. "+)", function(a,b,c) return a == c and a..b..c or a..b..divsil..c end)
    word = strsubn(word, "(" .. VOCAL_SALVO_I .. ")([iI]" .. VOCAL_SALVO_I .. "+)", "%1"..divsil.."%2")
    word = strsubn(word, "([aeoAEO])%1", "%1"..divsil.."%1")
    word = strsubn(word, "([aoAO])([eE])", "%1"..divsil.."%2")

    -- lupi (unica sílaba cuando i final precedida de consonante)
    word = strsubn(word, divsil.."+("..CONS.."+)i$", "%1ʲ")
    word = strsubn(word, divsil.."+("..CONS.."+)i(%s)", "%1ʲ%2")

	word = strsubn(word, SEPARADORES_SILABICOS.."+", ".")
	word = strstrip(word, SEPARADORES_SILABICOS.."+")

	return word
end

local V = "[aeiouəɨ]"
local C = "[bkdfɡhʒlmnprstvzʃw]"
local X = "j[bkdfɡhʒlmnprstvzʃjwaəeiɨou]"
local U = "j[aeəɨiou]"

local mapeo_vocales = {
    ["a"]="a", ["e"]="e", ["i"]="i", ["o"]="o", ["u"]="u", ["ă"]="ə", ["â"]="ɨ", ["î"]="ɨ",
}

local mapeo_1 = {
	["b"]="b",	["c"]="k",	["d"]="d",
	["f"]="f",	["g"]="ɡ",	["h"]="h", ["j"]="ʒ",
	["k"]="k",	["l"]="l",	["m"]="m", ["n"]="n",
	["p"]="p",	["q"]="k",	["r"]="r",	["s"]="s",	["t"]="t",
	["v"]="v",	["x"]="ks",
	["ș"]="ʃ",  ["ț"]="t͡s",
}

local mapeo_1b = {
    ["y"]="j",
    ["w"]="v"
}

local mapeo_2 = {
    ["ce"] = "t͡ʃe",
    ["ci"] = "t͡ʃi",
    ["che"] = "ke",
    ["chi"] = "ki",
    ["ge"] = "d͡ʒe",
    ["gi"] = "d͡ʒi",
    ["ghe"] = "ɡe",
    ["ghi"] = "ɡi",
}

local diptongos = {
    ["ai"] = "aj", ["au"] = "aw", ["ei"] = "ej",
    ["eu"] = "ew", ["ii"] = "ij",
    ["oi"] = "oj", ["ou"] = "ow", ["ui"] = "uj",
    ["uu"] = "uw", ["ăi"] = "əj", ["ău"] = "əw",
    ["âi"] = "ɨj", ["âu"] = "ɨw",
    ["ia"] = "ja", ["ie"] = "je", ["io"] = "jo",
    ["iu"] = "ju", ["ue"] = "we",
    ["ua"] = "wa", ["uă"] = "wə", ["uâ"] = "wɨ",
}

local triptongos = {
    ["eai"] = "eaj", ["eau"] = "eaw", ["eoa"] = "eoa",
    ["iai"] = "jaj", ["iau"] = "jaw", ["iei"] = "jej",
    ["eu"]  = "jew", ["ioi"] = "joj", ["iou"] = "jow",
    ["oai"] = "oaj", ["uai"] = "waj", ["uau"] = "waw",
    ["uăi"] = "wəj", ["ioa"] = "joa"
}

local fonetico = {
    ["ea"] = "e̯a", ["eo"] = "e̯o", ["oa"] = "o̯a"
}

local function generar_pron(texto)
    texto = normalizar(texto)
    texto = strlower(texto)

    local convertido = {}
	local fragmentos = strsplit(texto, "%s*|%s*")

	for _,fragmento in ipairs(fragmentos) do
	    local palabras = strsplit(fragmento, "%s")
		local palabras_convertidas = {}
	    for _,p in ipairs(palabras) do
	    	p = separar_en_silabas(p)

            if strfind(p, TILDE) then
                p = strsubrep(p, "^(.*)"..SEPARADOR.."(.-)"..TILDE, "%1"..ac_primario.."%2")
                p = strsubn(p, "^(.-)"..TILDE, ac_primario.."%1")
            end

            for a,b in pairs(mapeo_2) do
                p = strsubn(p, a, b)
            end
            for a,b in pairs(mapeo_1) do
                p = strsubn(p, a, b)
            end
            for a,b in pairs(mapeo_1b) do
                p = strsubn(p, a, b)
            end
            for a,b in pairs(triptongos) do
                p = strsubn(p, a, b)
            end
            for a,b in pairs(diptongos) do
                p = strsubn(p, a, b)
            end
            for a,b in pairs(mapeo_vocales) do
                p = strsubn(p, a, b)
            end

            p = strsubn(p, "("..SEPARADORES_SILABICOS..")ks", "k%1s")
            p = strsubn(p, "("..SEPARADORES_SILABICOS..")ij", "%1ji")
            p = strsubn(p, "(" .. C .. ")[ij]$", "%1ʲ")

	    	insert(palabras_convertidas, p)
	    end
	    insert(convertido, concat(palabras_convertidas, " "))
	end

    local fono = concat(convertido, " | ")

    -- fonetico --> REVISAR: realmente esto es fonético ?? se me hace que sigue siendo fonológico
    for a, b in pairs(fonetico) do
        fono = strsubn(fono, a, b)
    end
    return {{strhtml(fono)}}
end

function export.procesar_pron_args(tit, args)
    local vino_ayuda = true
	if not args["ayuda"][1] then
        vino_ayuda = false
		args["ayuda"][1] = tit
	end

	if #args["fone"] < 1 and #args["fono"] < 1 then
		local rimas, ls, ac = {}, {}, {}
		local tiene_espacios = strfind(tit, " ")
		local A = #args["ayuda"]
		local j = 1 -- indice de la ayuda
		local k = 1 -- cantidad de pronunciaciones insertadas (máximo 9)
		while k <= 9 and j <= A do
			if not tiene_espacios then
				local div = strsubn(separar_en_silabas(normalizar(args["ayuda"][j])), "%.", "-")
				insert(args["d"], div)
				local longsib = strcount(div, "%-") + 1
				ls[longsib] = true
                if longsib == 1 then
                    ac["monosílaba"] = true
                else
                    local t_idx = strfind(div, TILDE)
                    if t_idx then
                        local resto = substr(div, t_idx+1)
                        local ssil = strcount(resto, SEPARADORES_SILABICOS)
                        if ssil == 0 then
                            ac["aguda"] = true
                        elseif ssil == 1 then
                            ac["llana"] = true
                        elseif ssil == 2 then
                        	ac["esdrújula"] = true
                        else
                            ac["sobreeesdrújula"] = true
                        end
                    end
                end
			end
			
			local fono = generar_pron(args["ayuda"][j])

			local rim = fono[1][1]
			rim = strsubn(rim, "^.*"..ac_primario.."(.-)$", "%1")
			rim = strsubn(rim, ".-".."("..V..".*"..")".."$", "%1")
			rimas[rim] = true

			for i,_ in ipairs(fono) do
				if vino_ayuda and args["ayuda"][j] then
					insert(args["fgraf"], {args["ayuda"][j]})
				end
				insert(args["fono"], fono[i])
				k = k + 1
				if k > 9 then
					break
				end
			end

			j = j + 1
		end
		for rim,_ in pairs(rimas) do
			insert(args["rima"], rim)
		end
		for lon,_ in pairs(ls) do
			insert(args["ls"], lon)
		end
		for ace,_ in pairs(ac) do
			insert(args["ac"], ace)
		end
	end

	return args
end

return export