require('strict');

local getArgs = require ('Module:Arguments').getArgs;
local override_data = mw.loadData ('Module:Language/data/ISO 639 override');


--[[--------------------------< E R R O R _ M E S S A G E S >--------------------------------------------------

TODO: change to default hiding of error messages?  show with with personal css override:
	.show_639_err_msgs {display: inline !important;}

]]

local error_messages = {
--	['err_msg'] = '<span style="font-size:100%; display:none" class="error show_639_err_msgs">error: $1</span>[[Category:ISO 639 name template errors]]',
	['err_msg'] = '<span style="font-size:100%;" class="error show_639_err_msgs">error: $1</span>[[Category:ISO 639 name template errors]]',
	['err_text'] = {															-- error messages used only in the code to name functions
		['ietf'] = '$1 is an IETF tag',											-- $1 is the ietf tag
		['required'] = 'ISO 639$1 code is required',							-- $1 is the 639 '-1', '-2', '-3', '-5' part suffix; may be empty string
		['not_code'] = '$1 is not an ISO 639$2 code',							-- $1 is non-code input; $2 is 639 part suffix; may be empty string

																				-- code to name functions and iso_639_name_to_code()
		['not_found'] = '$1 not found in ISO 639-$2 list',						-- $1 is code or language name; $2 is 639 part suffix(es)

																				-- iso_639_name_to_code() only
		['name'] = 'language name required',
		['not_part'] = '$1 not an ISO 639 part',								-- $1 is invalid 639 suffix (without hyphen)
		['no_code'] = 'no code in ISO 639-$1 for $2',							-- $1 is 639 part suffix; $2 is language name
		}
	}


--[[--------------------------< I S _ S E T >------------------------------------------------------------------

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set( var )
	return not (var == nil or var == '');
end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
empty string.

]=]

local function make_wikilink (link, display)
	if is_set (link) then
		if is_set (display) then
			return table.concat ({'[[', link, '|', display, ']]'});
		else
			return table.concat ({'[[', link, ']]'});
		end
	else
		return '';
	end
end


--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------

Populates numbered arguments in a message string using an argument table.

]]

local function substitute (msg, args)
	return args and mw.message.newRawMessage (msg, args):plain() or msg;
end


--[[--------------------------< E R R O R _ M S G >------------------------------------------------------------

create an error message

]]

local function error_msg (msg, arg)
	return substitute (error_messages.err_msg, substitute (error_messages.err_text[msg], arg))
end


--[[--------------------------< L A N G _ N A M E _ G E T >----------------------------------------------------

returns first listed language name for code from data{} table; strips parenthetical disambiguation; wikilinks to
the language article if link is true; returns nil else

]]

local function lang_name_get (code, data, link, label)
	local name;
	
	if data[code] then
		name = data[code][1]:gsub ('%s*%b()', '');								-- get the name and strip parenthetical disambiguators if any
		if link then															-- make a link to the language article?
			if name:find ('languages') then
				name = make_wikilink (name, label);								-- simple wikilink for collective languages unless there is a label
			elseif override_data.article_name[code] then
				name = make_wikilink (override_data.article_name[code][1], label or name);	-- language name or label with wikilink from override data
			else
				name = make_wikilink (name .. ' language', label or name);		-- [[name language|name]] or [[name language|label]]
			end
		end
		return name;
	end
end


--[[--------------------------< A D D _ I E T F _ E R R O R _ M S G >------------------------------------------

assembles return-text (language code, language name, or error message) with IETF error message into properly
formatted readable text

]]

local function add_ietf_error_msg (text, ietf_err)
	return table.concat ({
		text,																	-- code name, language name, or error message
		'' ~= ietf_err and ' ' or '',											-- needs a space when ietf_err is not empty
		ietf_err,});															-- tack on ietf error message if one exists
end


--[[--------------------------< _ I S O _ 6 3 9 _ N A M E >----------------------------------------------------

searches through the ISO 639 language tables for a name that matches the supplied code.  on success returns first
language name that matches code from template frame perhaps with an error message and a second return value of true;
on failure returns the provided input text, and error message and a second return value of nil.  The second return
value is a return value used by iso_639_name_exists()

looks first in the override data and then sequentially in the 639-1, -2, -3, and -5 data

]]

local function _iso_639_name (frame)
	local args = getArgs(frame);
	
	if not args[1] then
		return error_msg ('required', '');										-- empty string doesn't specify a 639 part (hides $1 in error message)
	end
	
	local code = args[1];														-- used in error messaging
	local lc_code;																-- holds lowercase version of code for indexing into the data tables
	local ietf_err;																-- holds an error message when args[1] (language code) is in IETF tag form (may or may not be a valid IETF tag)
	local name;																	-- the retrieved language name
	local data = {};															-- holds one of the various 639 code to name tables
	local link = 'yes' == args.link;											-- make a boolean

	code, ietf_err = code:gsub('(.-)%-.*', '%1');								-- strip ietf subtags; ietf_err is non-zero when subtags are stripped
	lc_code = code:lower();

	ietf_err = (0 ~= ietf_err) and error_msg ('ietf', args[1]) or '';			-- when tags are stripped create an error message; empty string for concatenation else

	if 2 > #code or 3 < #code then												-- 639 codes are 2 or three characters only
		return  table.concat ({code, ' ', error_msg ('not_code', {code, ''})});	-- return whatever is in code + an error message; empty string hides $1
	end

	data = override_data.override;												-- first look in the override table
	name = lang_name_get (lc_code, data, link, args.label);
	if name then
		return add_ietf_error_msg (name, ietf_err), true;
	end

	if 2 == #lc_code then
		data = mw.loadData ('Module:Language/data/iana languages');				-- this data used only for ISO 639-1 language codes / names listed there
		name = lang_name_get (lc_code, data, link, args.label);
		if name then
			return add_ietf_error_msg (name, ietf_err), true;
		end
	else
		for _, source in ipairs ({												-- loop sequentially through the other data tables
			'Module:Language/data/ISO 639-2',
			'Module:Language/data/ISO 639-3',
			'Module:Language/data/ISO 639-5'
			}) do
				data = mw.loadData (source);
				name = lang_name_get (lc_code, data, link, args.label);
				if name then
					return add_ietf_error_msg (name, ietf_err), true;
				end
		end
	end
	
	return error_msg ('not_found', {code, '1, -2, -3, -5'});					-- here when code is not found in the data tables
end


--[[--------------------------< I S O _ 6 3 9 _ N A M E >------------------------------------------------------

template entry point; returns first language name that matches code from template frame or an error message
looks first in the override data and then sequentially in the 639-1, -2, -3, and -5 data

]]

local function iso_639_name (frame)
	local ret_val = _iso_639_name (frame);										-- ignore second return value
	return ret_val;																-- return language name and / or error message
end


--[[--------------------------< I S O _ 6 3 9 _ N A M E _ E X I S T S >----------------------------------------

template entry point; returns true if language code maps to a language name; intended as a replacement for:
	{{#exist:Template:ISO 649 name <code>|<exists>|<doesn't exist>}}
Instead of that expensive parser function call use this function:
	{{#if:{{#invoke:Sandbox/trappist the monk/ISO 639 name|iso_639_name_exists|<code>}}|<exists>|<doesn't exist>}}
on success, returns true; nil else

]]

local function iso_639_name_exists (frame)
	local _, exists;
	 _, exists = _iso_639_name (frame);											-- ignore name/error message return; exists is true when name found for code; nil else
	 return exists;
end


--[[--------------------------< I S O _ 6 3 9 _ C O D E _ C O M M O N >----------------------------------------

this is code that is common to all of the iso_639_code_n() functions which serve only as template entry points to
provide the frame, the name of the appropriate data source, and to identify which 639 part applies.

this function returns a language name or an error message

]]

local function iso_639_code_common (frame, source, part)
	local args = getArgs(frame);

	if not args[1] then															-- if code not provided in the template call
		return error_msg ('required', '-' .. part);								-- abandon
	end

	local code;																	-- used for error messaging
	local ietf_err;																-- holds an error message when args[1] (language code) is in IETF tag form (may or may not be a valid IETF tag)
	code, ietf_err = args[1]:gsub('(.-)%-.*', '%1');							-- strip ietf subtags; ietf_err is non-zero when subtags are stripped
	ietf_err = (0 ~= ietf_err) and error_msg ('ietf', args[1]) or '';			-- when tags are stripped create an error message; empty string for concatenation else

	if (1 == part and 2 ~= #code) or (1 < part and 3 ~= #code) then				-- 639-1 codes are 2 characters only; all others 3 characters
		return  error_msg ('not_code', {code, '-' .. part});
	end

	local data = mw.loadData (source);											-- get the data

	return add_ietf_error_msg (lang_name_get (code:lower(), data, args.link) or error_msg ('not_found', {code, part}), ietf_err);
end


--[[--------------------------< I S O _ 6 3 9 _ C O D E _ 1 >--------------------------------------------------

template entry point; returns first language name that matches ISO 639-1 code from template frame or an error message

]]

local function iso_639_code_1 (frame)
	return iso_639_code_common (frame, 'Module:Language/data/iana languages', 1);	-- this data used only for ISO 639-1 language codes / names listed there
end


--[[--------------------------< I S O _ 6 3 9 _ C O D E _ 2 >--------------------------------------------------

template entry point; returns first language name that matches ISO 639-2 code from template frame or an error message

]]

local function iso_639_code_2 (frame)
	return iso_639_code_common (frame, 'Module:Language/data/ISO 639-2', 2);	-- ISO 639-2 language codes / names
end


--[[--------------------------< I S O _ 6 3 9 _ C O D E _ 3 >--------------------------------------------------

template entry point; returns first language name that matches ISO 639-3 code from template frame or an error message

]]

local function iso_639_code_3 (frame)
	return iso_639_code_common (frame, 'Module:Language/data/ISO 639-3', 3);	-- ISO 639-3 language codes / names
end


--[[--------------------------< I S O _ 6 3 9 _ C O D E _ 5 >--------------------------------------------------

template entry point; returns first language name that matches ISO 639-5 code from template frame or an error message

]]

local function iso_639_code_5 (frame)
	return iso_639_code_common (frame, 'Module:Language/data/ISO 639-5', 5);	-- ISO 639-5 language codes / names
end


--[[--------------------------< I S O _ 6 3 9 _ N A M E _ T O _ C O D E >--------------------------------------------------

template entry point; returns ISO 639-1, -2, -3, or -5 code associated with language name according to part (1, 2, 3, 5) argument;
when part is not provided scans 1, 2, 3 , 5 and returns first code

]]

local function iso_639_name_to_code (frame)
	local args = getArgs(frame);
	
	if not args[1] then
		return error_msg ('name');
	end
	
	local name = args[1];														-- used in error messaging
	local lc_name = name:lower();												-- holds lowercase version of name for indexing into the data table

	local part = nil or args[2] and tonumber(args[2]);
	if part then
		if not ({'1', '2', '3', nil, '5'})[part] then
			return error_msg ('not_part', part);								-- part is not an ISO 639 part
		end
	end

	local name_data = mw.loadData ('Module:Language/data/ISO 639 override').override;	-- first check the override data
	local code;																	-- place to hold the code if not a 2-character code
	for k, v in pairs (name_data) do											-- spin through the override table
		if lc_name == v[1]:lower() then											-- if name is found and
			if part then														-- when part specified
				if 1 == part then												-- if looking for part 1 codes
					if 2 == #k then
						return k;												-- return 2 char code; don't care about 3 char codes here
					end
				else															-- parts 2, 3, 5
					if 3 == #k then
						return k;												-- return 3 char code
					end
				end
			else																-- part not specified
				if 2 == #k then
					return k;													-- always return 2 char code when found
				else
					code = k;													-- save 3 char code in case there isn't a 2 char code
				end
			end
		end
	end
	
	if code then																-- has a value if we found a 3-character code
		return code;
	end

	local name_data = mw.loadData ('Module:Language/data/ISO 639 name to code');	-- ISO 639 language names to code table

	if name_data[lc_name] then
		if part then
			if 5 == part then
				part = 4;														-- part 5 codes are at name_data[lc_name][4]; there is no part 639-4
			end
			if '' ~= name_data[lc_name][part] then
				return name_data[lc_name][part];
			else
				return error_msg ('no_code', {part, name});						-- no code in ISO 639-part for language
			end
		else
			for _, v in ipairs ({1, 2, 3, 5-1}) do								-- no part provided, scan through name's list of codes to get the first available code
				if '' ~= name_data[lc_name][v] then
					return name_data[lc_name][v];
				end
			end
		end
	else
		return error_msg ('not_found', {name, part or '1, -2, -3, -5'});
	end
end


--[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
]]

return {
	iso_639_name = iso_639_name,
	iso_639_name_exists = iso_639_name_exists,
	iso_639_code_1 = iso_639_code_1,
	iso_639_code_2 = iso_639_code_2,
	iso_639_code_3 = iso_639_code_3,
	iso_639_code_5 = iso_639_code_5,
	iso_639_name_to_code = iso_639_name_to_code,
	};