Module:tg-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Tajik language text per WT:TG TR. It is also used to transliterate Wakhi and Yagnobi. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tg-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local m_string_utils = require("Module:string utilities")
local gsub = m_string_utils.gsub
local U = m_string_utils.char

local tt = {
	["т"] = "t", ["Т"] = "T",
	["р"] = "r", ["Р"] = "R",
	["ф"] = "f", ["Ф"] = "F",
	["ю"] = "yu", ["Ю"] = "Yu",
	["ш"] = "š", ["Ш"] = "Š",
	["ҳ"] = "h", ["Ҳ"] = "H",
	["ъ"] = "ʾ", ["Ъ"] = "ʾ",
	["н"] = "n", ["Н"] = "N",
	["п"] = "p", ["П"] = "P",
	["й"] = "y", ["Й"] = "Y",
	["л"] = "l", ["Л"] = "L",
	["з"] = "z", ["З"] = "Z",
	["е"] = "e", ["Е"] = "E",
	["г"] = "g", ["Г"] = "G",
	["б"] = "b", ["Б"] = "B",
	["у"] = "u", ["У"] = "U",
	["с"] = "s", ["С"] = "S",
	["х"] = "x", ["Х"] = "X",
	["ч"] = "č", ["Ч"] = "Č",
	["я"] = "ya", ["Я"] = "Ya",
	["м"] = "m", ["М"] = "M",
	["о"] = "o", ["О"] = "O",
	["и"] = "i", ["И"] = "I", -- has dash word finally
	["ё"] = "yo", ["Ё"] = "Yo",
	["ж"] = "ž", ["Ж"] = "Ž",
	["к"] = "k", ["К"] = "K",
	["д"] = "d", ["Д"] = "D",
	["в"] = "v", ["В"] = "V",
	["а"] = "a", ["А"] = "A",
	["ҷ"] = "j", ["Ҷ"] = "J",
	["ӯ"] = "ü", ["Ӯ"] = "Ü",
	["э"] = "e", ["Э"] = "E",
	["ӣ"] = "i", ["Ӣ"] = "I",
	["қ"] = "q", ["Қ"] = "Q",
	["ғ"] = "ġ", ["Ғ"] = "Ġ",
	-- dated, removed in the 1998 reform
	["Ц"] = "Ts", ["ц"] = "ts", -- replaced with "тс", sometimes "с"
	["Щ"] = "Šč", ["щ"] = "šč", -- replaced with "шч"
	["Ы"] = "Y", ["ы"] = "y", -- replaced with "и"
	["Ь"] = "", ["ь"] = "" -- removed entirely
};

-- No longer used in Tajik, only supported "just in case"
local RUS_accent = U(0x301)
local RUonly = "ЦцЩщЫыЬь" .. RUS_accent
-- Letters
local all_letters = "аАбБвАгГғҒдДеЕёЁжЖзЗиИйЙкКқҚлЛмИнНоОпПрРсСтТуУӯӮфФхХҳҲчЧҷҶшШъЪэЭюЮяЯ" .. RUonly
local a_letter = "[" .. all_letters .. "]"

function export.tr(text, lang, sc)
	if not sc then
		sc = require("Module:languages").getByCode(lang):findBestScript(text):getCode()
	end

	if sc ~= "Cyrl" then
		text = nil
	else
		text = gsub(text, "\\", "\\")
		text = gsub(text, "#", "\1")
		text = gsub(text, "^", "#")
		text = gsub(text, "$", "#")
		text = gsub(text, "(\n)", "#%1#")
		text = gsub(text, "([^" .. all_letters .. "]+)", "#%1#")
		text = gsub(text, "(И)" .. "(#)", "Ì%2" )
		text = gsub(text, "(и)" .. "(#)", "ì%2" )
		text = gsub(text, "(#)" .. "(" .. a_letter .. "?" .. ")" .. "ì" .. "(#)", "%1%2и%3" )
		text = gsub(text, "(#)" .. "(" .. a_letter .. "?" .. ")" .. "Ì" .. "(#)", "%1%2И%3" )
		text = gsub(text,
							   "([АОӮУЕЯЁЮИӢЕЪаоуӯэяёюиӣе][́̀]?)([ЕеИиÌìӢӣ])",
							   function(a, e)
			local iotated = {
				['е'] = 'йе', ['Е'] = 'ЙЕ',
				['и'] = 'йи', ['И'] = 'ЙИ', 
				['ì'] = '-йи', ['Ì'] = '-ЙИ', 
				['ӣ'] = 'йӣ', ['Ӣ'] = 'ЙӢ'				
			}
			return a .. iotated[e]
							   end)
		text = text
			:gsub("#Е", 'Ye')
			:gsub("#е", 'ye')
		text = gsub(text, "ì", "-и" )
		text = gsub(text, "Ì", "-И" )
		text = gsub(text, "#", "")
		text = gsub(text, "\1", "#")
		text = gsub(text, '.', tt)
	end

	return text
end

return export