Ir al contenido

Módulo:scripts/charAScript

De Wikcionario, el diccionario libre

La documentación para este módulo puede ser creada en Módulo:scripts/charAScript/doc

-- tomado de en.wiktionary.org/wiki/Module:scripts/charAScript

local subexport = {}

local cp = mw.ustring.codepoint
local floor = math.floor
local min = math.min
local split = mw.text.split

-- Copied from [[Module:Unicode data]].
local function binaryRangeSearch(codepoint, ranges)
	local low, mid, high
	low, high = 1, ranges.length or require "Módulo:tabla".length(ranges)
	while low <= high do
		mid = floor((low + high) / 2)
		local range = ranges[mid]
		if codepoint < range[1] then
			high = mid - 1
		elseif codepoint <= range[2] then
			return range, mid
		else
			low = mid + 1
		end
	end
	return nil, mid
end

-- Copied from [[Module:Unicode data]].
local function linearRangeSearch(codepoint, ranges)
	for i, range in ipairs(ranges) do
		if codepoint < range[1] then
			break
		elseif codepoint <= range[2] then
			return range
		end
	end
end

local function compareRanges(range1, range2)
	return range1[1] < range2[1]
end

-- Save previously used codepoint ranges in case another character is in the
-- same range.
local rangesCache = {}

--[=[
	Takes a codepoint or a character and finds the script code(s) (if any) that are appropriate for it based on the codepoint, using the data module [[Module:scripts/recognition data]]. The data module was generated from the patterns in [[Module:scripts/data]] using [[Module:User:Erutuon/script recognition]].
	
	By default, it returns only the first script code if there are multiple matches (i.e. the code we take to be the default). If `all_scripts` is set, then a table of all matching codes is returned.
]=]

local charAScriptData
function subexport.charAScript(char, all_scripts)
	charAScriptData = charAScriptData or mw.loadData("Módulo:scripts/identificación")
	local t = type(char)
	local codepoint
	if t == "string" then
		local etc
		codepoint, etc = cp(char, 1, 2)
		if etc then
			error("bad argument #1 to 'charAScript' (expected a single character)")
		end
	elseif t == "number" then
		codepoint = char
	else
		error(("bad argument #1 to 'charAScript' (expected string or a number, got %s)")
			:format(t))
	end
	
	local ret = {}
	local individualMatch = charAScriptData.individual[codepoint]
	if individualMatch then
		ret = split(individualMatch, "%s*,%s*")
	else
		local range
		if rangesCache[1] then
			range = linearRangeSearch(codepoint, rangesCache)
			if range then
				for i, script in ipairs(range) do
					if i > 2 then
						table.insert(ret, script)
						if not all_scripts then
							break
						end
					end
				end
			end
		end
		if not ret[1] then
			local index = floor(codepoint / 0x1000)
			range = linearRangeSearch(index, charAScriptData.blocks)
			if not range and charAScriptData[index] then
				range = binaryRangeSearch(codepoint, charAScriptData[index])
				if range then
					table.insert(rangesCache, range)
					table.sort(rangesCache, compareRanges)
				end
			end
			if range then
				for i, script in ipairs(range) do
					if i > 2 then
						table.insert(ret, script)
						if not all_scripts then
							break
						end
					end
				end
			end
		end
	end
	if not ret[1] then
		table.insert(ret, "None")
	end
	if all_scripts then
		return ret
	else
		return ret[1]
	end
end

return subexport