Módulo:sortkey
Apariencia
Fallaron 10 de 16 pruebas (actualizar)
pruebas español | |||||
---|---|---|---|---|---|
idioma | entrada | se espera | salida | evaluación | |
español (es) | témpura | TEMPURA | TEMPURA | [ ok ] | |
español (es) | ¿Qué? | QUE | ¿QUE? | [ MAL ] | |
español (es) | Por qué? | POR QUE | POR QUE? | [ MAL ] | |
español (es) | Por qué | POR QUE | POR QUE | [ ok ] | |
español (es) | Por que | POR QUE | POR QUE | [ ok ] | |
español (es) | re! | RE | RE! | [ MAL ] | |
español (es) | ¡re! | RE | ¡RE! | [ MAL ] | |
español (es) | re | RE | RE | [ ok ] | |
español (es) | pingüino | PINGUINO | PINGUINO | [ ok ] | |
pruebas con diacríticos | |||||
idioma | entrada | se espera | salida | evaluación | |
griego antiguo (grc) | Πηληϊάδης Αἶνος | ΠΗΛΗΙΑΔΗΣ ΑΙΝΟΣ | ΠΗΛΗΙΑΔΗΣ ΑΙΝΟΣ | [ ok ] | |
navajo (nv) | shį́į́dą́ą́ʼ | SHIIDAA | SIIDAAZ | [ MAL ] | |
prueba con el dotted dottles i | |||||
idioma | entrada | se espera | salida | evaluación | |
turco (tr) | İzmir | İZMİR | IZMIR | [ MAL ] | |
turco (tr) | ışık | IŞIK | ISIK | [ MAL ] | |
prueba con módulos dedicados | |||||
idioma | entrada | se espera | salida | evaluación | |
vietnamita (vi) | Tuyên ngôn toàn thế giới về nhân quyền của Liên Hợp Quốc | TUYE₂N NGO₂N TOAN1 THE₂4 GIO₃I4 VE₂1 NHA₂N QUYE₂N1 CUA2 LIE₂N HO₃P5 QUO₂C4 | TUYÊN NGÔN TOÀN THẾ GIỚI VỀ NHÂN QUYỀN CỦA LIÊN HỢP QUỐC | [ MAL ] | |
chino (zh) | 命裡有時終須有,命裡無時莫強求 | 口05衣07月02日06糸05頁03月02,口05衣07火08日06艸07弓08水02 | 命裡有時終須有,命裡無時莫強求 | [ MAL ] | |
chino (zh) | ⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心麵 | 辵54辵54麥09 | ⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心⿺辶⿳穴⿲月⿱⿲幺言幺⿲長馬長刂心麵 | [ MAL ] |
Esta documentación está transcluida desde Módulo:sortkey/doc.
Los editores pueden experimentar en la zona de pruebas de este módulo.
Por favor, añade las categorías e interwikis a la subpágina de documentación. Subpáginas de este módulo.
Los editores pueden experimentar en la zona de pruebas de este módulo.
Por favor, añade las categorías e interwikis a la subpágina de documentación. Subpáginas de este módulo.
local ugsub = mw.ustring.gsub
local tofixednfd = require("Módulo:String").toNFD
local tofixednfc = require("Módulo:String").toNFC
local corregir = require("Módulo:String").corregirSecuenciasIncorrectas
local dir_char = "\226\128\170-\226\128\174\226\129\166-\226\129\169"
local function remove_directional_chars(text)
return (ugsub(text, "^[" .. dir_char .. "]*(.*)%f[%z" .. dir_char .. "][" .. dir_char .. "]*$", "%1"))
end
--[==[Creates a sort key for the given entry name, following the rules appropriate for the language. This removes diacritical marks from the entry name if they are not considered significant for sorting, and may perform some other changes. Any initial hyphen is also removed, and anything parentheses is removed as well.
The <code>sort_key</code> setting for each language in the data modules defines the replacements made by this function, or it gives the name of the module that takes the entry name and returns a sortkey.]==]
-- Convert any HTML entities.
local function noEntities(text)
if text:match("&[^;]+;") then
return require("Module:String/avanzado").get_entities(text)
else
return text
end
end
-- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily.
local function checkNoEntities(text)
local textNoEnc = noEntities(text)
if textNoEnc ~= text and mw.loadData("Module:enlaces/datos").unsupported_titles[text] then
return text
else
return textNoEnc
end
end
-- Process carets (and any escapes). Default to simple removal, if no pattern/replacement is given.
local function processCarets(text, pattern, repl)
local rep
repeat
text, rep = text:gsub("\\\\(\\*^)", "\3%1")
until rep == 0
return text
:gsub("\\^", "\4")
:gsub(pattern or "%^", repl or "")
:gsub("\3", "\\")
:gsub("\4", "^")
end
local function removeCarets(text, sc)
if not sc:hasCapitalization() and sc:isTransliterated() and text:match("%^") then
return processCarets(text)
else
return text
end
end
local export = {}
function export.generarSortkey(text, cod, idioma)
if (not text) or text == "" then
return text, nil, {}
end
if text:find("<[^<>]+>") then
require("Módulo/traza")("símbolo HTML en texto")
end
-- Remove directional characters, soft hyphens, strip markers and HTML tags.
text = ugsub(text, "[\194\173" .. dir_char .. "]", "")
text = mw.text.unstrip(text)
:gsub("<[^<>]+>", "")
text = mw.uri.decode(text, "PATH")
text = checkNoEntities(text)
-- Remove initial hyphens and * unless the term only consists of spacing + punctuation characters.
text = ugsub(text, "^([-]*)[-־ـ᠊*]+([-]*)(.*[^%s%p].*)", "%1%2%3")
local sc = require("Module:String/avanzado").findBestScript(text, idioma)
text = corregir(text, sc)
text = tofixednfd(text, sc)
text = removeCarets(text, sc)
-- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
if idioma.dotted_dotless_i then
text = text
:gsub(mw.ustring.toNFD("İ"), "i")
:gsub("I", "ı")
text = tofixednfd(text, sc)
end
-- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
-- An exception is made for scripts that (sometimes) sort by scraping page content, as that means they are sensitive to changes in capitalization (as it changes the target page).
local fail, cats
if not sc:sortByScraping() then
text = text:ulower()
end
text, fail, cats = require("Módulo:String/sustituir")(text, nil, nil, cod, idioma, sc, idioma.sort_key, "makeSortKey")
if not sc:sortByScraping() then
if idioma.dotted_dotless_i and not idioma.sort_key then
text = text
:gsub("ı", "I")
:gsub("i", "İ")
text = tofixednfc(text, sc)
end
text = text:uupper()
end
-- Remove parentheses, as long as they are either preceded or followed by something.
text = text
:gsub("(.)[()]+", "%1")
:gsub("[()]+(.)", "%1")
text = require("Módulo:String").encode_html(text)
return text, fail, cats
end
return export