Jump to content

Module:User:Catonif/sq-ndecl

From Wiktionary, the free dictionary

This is a private module sandbox of Catonif, for their own experimentation. Items in this module may be added and removed at Catonif's discretion; do not rely on this module's stability.


--[[
	Noun and adjective declension module for Albanian.
	Is meant do deal only with the modern standard.
	Supports neuter nouns.

References:
	FS = Francesco Solano (1988), Manuale di lingua albanese
--]]

--[[
	TODO: understand if <vito> and <atë> are irregular.
	TODO: adjectives.
	TODO: should monosyllables have accents on them as well?
	TODO: I divided the table forst by sing/plur rather than by def/indef. Is it fine?
--]]

local export = {};

-- TODO: Is this a good thing to do? All around the place so I hope so...
local infls;

--[[ table
	Stands for "current quals". It stores the quals of the forms that
	set() is setting. Needs to be emptied often with q_b_e.
--]]
local c_q;

--[[ string
	Strips away the qual in square brackets from a form in the input. Meanwhile
	it stores it into c_q, so that set() will give the appropriate qual to the
	forms.
--]]
local function q_b(str)
	if (str:find('%[.+%]')) then
		if (not c_q) then c_q = {}; end
		c_q[#c_q + 1] = str:gsub('.+%s*%[%s*', ''):gsub('%s*%]%s*$', '');
		local r, _ = str:gsub('%s*%[.+%]%s*$', ''):gsub('^%s*', '');
		return r;
	else
		return str;
	end
end

--[[ void
	Pops the last element of c_q when all forms with a qual have been dealt with.
--]]
local function q_b_e()
	if (c_q) then
		c_q[#c_q] = nil;
		if (#c_q == 0) then
			c_q = nil;
		end
	end
end

--[[ table
	Used once in the function below, for quals.
--]]
local function clonetable(t)
	local r = {};
	for _, v in ipairs(t) do
		if (v) then table.insert(r, v); end
	end
	return r;
end

--[[ void
	Inserts form to a slot of a table.
--]]
local function set(slot, form)
	-- Creates the slot from scratch if it does not exist.
	if (not infls[slot]) then
		infls[slot] = { form };
	else
		-- Makes sure to avoid reduplication.
		for _, v in ipairs(infls[slot]) do
			if (v == form) then return; end
		end
		-- Adds the form.
		table.insert(infls[slot], form);
	end
	if (c_q) then
		infls[slot]['q'..#(infls[slot])] = clonetable(c_q);
	end
end

--[[ table
	Is used only for quals in tables, to then make the list at
	the end of them. If the table and head functions get split
	into two modules, this should go in the tables'.
--]]
local quals;

--[[ int
	Tells the index of a qual to display as a superscript number
	in a table. If it's the first time seeing the qual, it adds it
	to the list and returns the new index.
--]]
local function get_qual_index(q)
	if (not quals) then quals = {}; end
	for i, v in ipairs(quals) do
		if (v == q) then return i; end
	end
	table.insert(quals, q);
	return #quals;
end

--[[ void
	Prints a neat ordered list to place at the bottom of a table with all
	the quals that in the table are given as simple superscript numbers.
--]]
local function format_quals()
	if (quals) then
		local s = '';
		for i, v in ipairs(quals) do
			s = s..'<sup>'..i..'</sup>) '..v:gsub('^%l', string.upper)..'.';
			if (i < #quals) then s = s..'<br/>'; end
		end
		return s;
	end
	return nil;
end

--[[ string
	Turns a list of forms into a slot into a wikitext list of link. Also
	appends superscript numbers for quals, which are then listed by
	format_quals().
--]]
local function link(list)
	if (#list == 1 and not list.q1) then
		return list[1];
	else
		local s = '';
		for i, v in ipairs(list) do
			s = s..'[['..v..']]';
			if (i ~= #list) then s = s..','; end
			if (list['q'..i]) then
				for _, q in ipairs(list['q'..i]) do
					s = s..'<sup>'..get_qual_index(q)..'</sup>';
				end
			end
			s = s..' ';
		end
		return s;
	end
end

--[[ func, callable into a for
	The parameters |2= (main form), |3= (plural ending), |pl= (irregular
	plural), |f=/|m= (feminine/masculine equivalent) can be more than one
	for a single word. The slash '/' works for separating them.
--]]
local function loop(i) return i:gmatch('[^/]+'); end

--[[ table
	Like above, but returns a table.
--]]
local function split(i)
	local r = {};
	for v in loop(i) do table.insert(r, v); end
	return r;
end

--[[ string
	Inserts final <-ë>.

	Consonantal inflectional endings <-t, -s, -n> gain an extra final
	<-ë> if attached to forms stressed finally. [FS 22.2]

Parameters:
	s: The inflected form.

Returns:
	The form with <-ë> attached to it if conditions are satisfied.
--]]
local function fin_e(s)
	return s .. (mw.ustring.find(s, '[áéíóúý\u{0301}][^aeiouyë]+$') and 'ë' or '');
end

--[[ string
	Inserts euphonic <-i->.

	Plural forms stressed on the penult ending in <-as, -es, -ër, -ël,
	-ur, -ëz, -az> or stressed finally and ending in <-q, -gj>, itself
	preceded by a consonant, introduce a <-i-> in the stem before the
	inflectional endings <-t, -sh>. [FS 22.3]

Parameters:
	s: The plural form in question.
	i: The inflectional ending. Should be either 't' or 'sh'.

Returns:
	The final inflected form.
--]]
local function euf_i(s, i)
	if (s:find('ë[rlz]$') or s:find('[ae]s$') or s:find('az$') or s:find('ur$') or mw.ustring.find(s, '[áéíóúý\u{0301}][^aeiouyë]+q$') or mw.ustring.find(s, '[áéíóúý\u{0301}][^aeiouyë]+gj$')) then
		return s..'i'..i;
	else return s..i; end
end

--[[ string
Parameters:
	s: The singular form, after some modifications (see function calls).
	i: The suffix attached to form the plural.

Returns:
	The plural form.

	TODO: Should some plurals be automatic? e.g. Are words in -e always
	unchanged in plural?
--]]
local function get_pl(s, pl)
	-- For plurals identical with the singular give '*' as the plural ending argument.
	if (pl == '*') then return s; end
	s = s:gsub('ë$', '');
	-- metaphonesis, vowel raising [FS 18.g, h], triggered with '+'
	if (pl:find('^%+')) then pl = pl:sub(2);
		s = s:gsub('é', 'í'):gsub('á', 'é');
	end
	-- palatisation, triggered with 'j'
	if (pl:find('^j')) then pl = pl:sub(2);
		if (s:find('ll$')) then s = s:sub(0, -3) .. 'j'; -- j < ll [FS 18.i]
		elseif (s:find('[iu]r$')) then s = s:sub(0, -2) .. 'j'; -- j < r [FS 18.m], but only after <-i-, -u->
		elseif (s:find('k$')) then s = s:sub(0, -2) .. 'q'; -- q < k [FS 18.e]
		else s = s..'j'; end -- gj < g [FS 18.e]
	end
	-- move the stress if necessary. eg. in <atllárë>
	if (mw.ustring.find(pl, '[áéíóúý\u{0301}]')) then
		s = mw.ustring.gsub(s, '[áéíóúý\u{0301}]', {
			['á'] = 'a', ['é'] = 'e', ['í'] = 'i',
			['ó'] = 'o', ['ú'] = 'u', ['ý'] = 'y',
			['\u{0301}'] = '',
		});
	end
	return s..pl; -- attaches the ending
end

--[[ void
	Adds plural inflected forms to a table.

Parameters:
	infls: The table to add forms to.
	pl: The indefinite nominative/accusative plural form.
--]]
function set_pl_infls(pl)
	set('i_NA_p', pl);
	set('i_dat_p', pl..'ve');
	set('i_abl_p', euf_i(pl, 'sh'));
	set('d_NA_p', fin_e(euf_i(pl, 't')));
	--[[ The definite dative/ablative plural used to end in <-vet>. This is either
		falling or has already fell into disuse. I decided to exclude it, somewhat
		arbitrarily, though it can be easily reinstated if this meets objections.
	--]]
	set('d_DA_p', pl..'ve');
end

--[[ void
	Adds inflected forms to a table.

Parameters:
	infls: The table to add inflected forms to.
	g: Inflectional gender, i.e. the gender like which the term inflects as.
		Can be either 'm', 'f' or 'n'.
	arg: The term nominative/stem. Supports special characters '*' and '-'.
	has_plur (bool): Indicates whether the term has a plural or is a singularia
		tantum. If false, the following two parameters shouldn't be provided.
	pl: The inflectional ending for plural. Supports special characters '+'
		and 'j'.
	hard_pl: The entire plural form if it cannot be obtained regularly.
]]
function set_infls(g, arg, has_plur, pl, hard_pl)

	set('i_NA_s', arg:gsub('.%*', ''):gsub('%-', ''));

	-- set the above, can be started the reduction
	arg = arg
		:gsub('úa%-', 'ó') -- nominative -úa > -ó- [FS 26], eg. in <ftua>
		:gsub('ë%-', '') -- other schwas fall, eg. in <motër>
		:gsub('ýe%-', 'é'); -- nomative -ýe > -é-, eg. in <krye>
	
	local stem = arg
		:gsub('%*', '')
		:gsub('ë$', ''); -- final schwa falls, eg. in <vajzë>

	if (g == 'f') then -- feminine
		-- Between the stem and the inflectional ending, <-j-> is introduced
		-- to avoid hiatus. [FS 28.4, 5]
		local j = mw.ustring.find(stem, '[aeouyëáéóúý\u{0301}]$') and 'j' or '';
		set('i_DA_s', stem..j..'e');
		set('d_nom_s', stem:find('e$')
			-- Assuming all feminine nouns ending in unstressed <-e> lose
			-- it in favour of <-ja> in the definite form.
			and (stem:sub(0, -2):gsub('j$', '') .. 'ja')
			or (stem..j..'a'));
		local f_schwa = mw.ustring.find(stem, '[aeiouyëáéíóúý\u{0301}]$') and '' or 'ë';
		set('d_acc_s', fin_e(stem..f_schwa..'n'));
		set('d_DA_s', fin_e(stem..f_schwa..'s'));
	else
		--[[ The inflectional ending of masculine and neuter nouns is <-u>
			after velar consonants <-k, -g, -h> (to avoid palatisation),
			and after word-final stressed <-á, -é, -í>. It is <-i> in all
			other cases. [FS 26] This is not true for polysillabic words with
			final <-á>. Final <-ý> is not mentioned in the source,
			but I'm assuming it triggers this as well, for the example
			of <sýri> / <sýu>, which although very dialectal and rare, and might
			be the only example, is theoretically good to consider.
		--]]
		local is_baba = mw.ustring.find(stem, '[aeiouë].+á$')
		local stem_lc = stem .. (((mw.ustring.find(stem, '[kgáéíý]$') or stem:find('[^szx]h$')) and not is_baba) and 'u' or 'i');
		set('i_DA_s', stem_lc);
		if (g == 'm') then -- masculine
			set('d_nom_s', stem_lc);
			--[[ Polysillabic words in <-á> have as their accusative <-në>. This
				dialectally is true for words ending in <-í> as well, but it isn't
				standard.
			--]]
			set('d_acc_s', is_baba and (stem..'në') or (stem_lc..'n'));
			set('d_DA_s', stem_lc..'t');
		else -- neuter
			for _, v in ipairs(infls.i_NA_s) do
				local d_NA_s = fin_e(euf_i(v, 't'));
				set('d_nom_s', d_NA_s);
				set('d_acc_s', d_NA_s);
			end
			set('d_DA_s', stem_lc..'t');
		end
	end

	if (has_plur) then
		set_pl_infls(
			hard_pl -- the |pl= argument allows irregular plurals
			or get_pl(arg:gsub('.%*', ''), pl)
		);
	end

end

local function overwrite_manual(args)
	-- Ovewrites calculated forms with ones given manually in the template's arguments.
	for i in pairs(infls) do
		if args[i] then
			infls[i] = {};
			for vt in loop(args[i]) do
				set(i, q_b(vt)); q_b_e();
			end
		end
	end
	-- Check if there's more than one value in i_NA_s. Not something specific to manual
	-- overwriting, but structurally it makes sense to keep it here.
	if (#infls['i_NA_' .. (infls.i_NA_s and 's' or 'p')] > 1) then
		error("There should not be more than one indefinite nom/acc form. Please place"
			.."the declension in a separate entry as an alternative form.")
	end
end

local function template_export(args, func, sing_func, plur_func, ger_im_func)

	local g = args[1];

	-- Gerunds ending in <-im> work rather curiously, which is why treating
	-- them as a separate gender in the module's infrastructure seemed like
	-- a good idea.
	if (g == 'im') then
		return ger_im_func(args.head or mw.title.getCurrentTitle():sub(0, -3));
	end

	-- Create empty table that will be filled by void functions.
	infls = {};

	-- A word can be of a gender (in terms of article and adjective agreement)
	-- while conjugating like another one, eg. <babë>.
	local a_g = g:gsub('/.+', ''); -- agreemental gender
	local i_g = g:gsub('.+/', ''); -- inflectional gender

	-- Uncountable terms, for which give '-' as the plural ending argument.
	if (args[3] == '-') then
		for vt in loop(args[2]) do
			set_infls(i_g, q_b(vt), false);
			q_b_e();
		end
		overwrite_manual(args);
		return sing_func(a_g);

	-- Pluralia tantum, for which give '!' as the plural ending argument.
	elseif (args[3] == '!') then
		for vp in loop(args[2]) do
			set_pl_infls(q_b(vp));
			q_b_e();
		end
		overwrite_manual(args);
		return plur_func(a_g..'-p');

	else
		local request_plural = false;
		for vt in loop(args[2]) do
			vt = q_b(vt);
			if (args.pl) then
				for vp in loop(args.pl) do
					set_infls(i_g, vt, true, nil, q_b(vp));
					q_b_e();
				end
			elseif (args[3]) then
				for vp in loop(args[3]) do
					set_infls(i_g, vt, true, q_b(vp));
					q_b_e();
				end
			else
				set_infls(i_g, vt, false);
				request_plural = true; -- if no plural is given, request it.
			end
			q_b_e();
		end
		overwrite_manual(args); -- TODO: I don't like I'm calling this function three distinct times.
		return func(a_g, request_plural);
	end
end

function export.head(frame)

	local args = frame:getParent().args;

	local data = {
		lang = require('Module:languages').getByCode('sq'),
		pos_category = frame.args.proper and 'proper noun' or 'noun',
	};

	local function label(x, l)
		x.label = l;
		for i, v in ipairs(x) do
			if (x['q'..i]) then
				x[i] = {term = v, q = table.concat(x['q'..i], ', ')};
				x['q'..i] = nil;
			end
		end
		return x;
	end

	-- TODO: Is it fine if I use this voidly here while returningly in the table function?
	template_export(args,

		function(g, request_plural)
			data.genders = {g};
			data.heads = infls.i_NA_s;
			data.inflections = {
				label(infls.d_nom_s, 'definite'),
			};
			if (request_plural) then
				table.insert(data.inflections, { label =
					'<small>[please provide plural]</small>' ..
					'[[Category:Requests for inflections in Albanian entries]]'
				});
			else
				table.insert(data.inflections, label(infls.i_NA_p, 'plural'));
			end
		end,
		
		function(g) -- sing only
			data.genders = {g};
			data.heads = infls.i_NA_s;
			data.inflections = {
				label(infls.d_nom_s, 'definite'),
			};
		end,

		function(g) -- plur only
			data.genders = {g};
			data.heads = infls.i_NA_p;
			data.inflections = {
				label(infls.d_NA_p, 'definite'),
			};
		end,

		function(stem) -- gerund <-im>
			data.genders = {'m'};
			data.heads = {stem..'ím'};
			data.inflections = {
				{ label = 'definite', stem..'ími' },
				{ label = 'plural', {
					term = stem..'íme',
					genders = {'f'},
				} }
			};
		end
	);

	-- The arguments |f= and |m= allow feminine and masculine equivalents respectively.
	if (args.f) then
		table.insert(data.inflections, label(split(args.f), 'feminine equivalent'));
	elseif (args.m) then
		table.insert(data.inflections, label(split(args.m), 'masculine equivalent'));
	end

	return require('Module:headword').full_headword(data);

end

function export.table(frame)

	return template_export(frame:getParent().args,

		-- The template names are of course temporary.

		function(g)
			return frame:expandTemplate { title = 'Template:User:Catonif/sq-noun-table', args = {
				link(infls.i_NA_s),
				link(infls.d_nom_s),
				link(infls.d_acc_s),
				link(infls.i_NA_p),
				link(infls.d_NA_p),
				link(infls.i_DA_s),
				link(infls.d_DA_s),
				link(infls.i_dat_p),
				link(infls.i_abl_p),
				link(infls.d_DA_p),
				['g'] = g,
				quals = format_quals(),
			} };
		end,

		function(g) -- sing only
			return frame:expandTemplate { title = 'Template:User:Catonif/sq-noun-table-s', args = {
				link(infls.i_NA_s),
				link(infls.d_nom_s),
				link(infls.d_acc_s),
				link(infls.i_DA_s),
				link(infls.d_DA_s),
				['g'] = g,
				quals = format_quals(),
			} };
		end,

		function(g) -- plur only
			return frame:expandTemplate { title = 'Template:User:Catonif/sq-noun-table-p', args = {
				link(infls.i_NA_p),
				link(infls.d_NA_p),
				link(infls.i_dat_p),
				link(infls.i_abl_p),
				link(infls.d_DA_p),
				['g'] = g,
				quals = format_quals(),
			} };
		end,

		function(stem) -- gerund <-im>
			return frame:expandTemplate {
				title = 'Template:User:Catonif/sq-noun-table-im',
				args = { stem },
			};
		end
	);

end

return export;