Modul:Webarchive
Megjelenés
Webarchive[mi ez?] • [dokumentáció: mutat, ] • [tesztek: létrehozás]
--[[ ----------------------------------
Lua module implementing the {{webarchive}} template.
A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
]]
require('strict')
local p = {}
local track = {} -- Associative array to hold tracking categories
--[[--------------------------< configuration >-----------------------
Global configuration variables
]]
local tname = 'Webarchive' -- name of calling template. Change if template rename.
local maxurls = 10 -- Max number of URLs allowed.
local plain = nil
--[[--------------------------< inlineError >-----------------------
Critical error. Render output completely in red. Add to tracking category.
]]
local function inlineError(arg, msg)
return inlineRed('Hiba a webarchive sablonban: Ellenőrizd a(z) <code style="color:inherit; border:inherit; padding:inherit;">|' .. arg .. '=</code> értékét. ' .. msg, 'error')
end
--[[--------------------------< inlineRed >-----------------------
Render a text fragment in red, such as a warning as part of the final output.
Add tracking category.
]]
local function inlineRed(msg, trackmsg)
if trackmsg == "warning" then
track["Kategória:Webarchive sablon figyelmeztetés"] = 1
elseif trackmsg == "error" then
track["Kategória:Webarchive sablon hiba"] = 1
end
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end
--[[--------------------------< trimArg >-----------------------
]]
local function trimArg(arg)
if arg == "" or arg == nil then
return nil
else
return mw.text.trim(arg)
end
end
--[[--------------------------< base62 >-----------------------
Convert base-62 to base-10
Credit: https://de.wikipedia.org/wiki/Modul:Expr
]]
local function base62( value )
local r = 1
if value:match( "^%w+$" ) then
local n = #value
local k = 1
local c
r = 0
for i = n, 1, -1 do
c = value:byte( i, i )
if c >= 48 and c <= 57 then
c = c - 48
elseif c >= 65 and c <= 90 then
c = c - 55
elseif c >= 97 and c <= 122 then
c = c - 61
else -- How comes?
r = 1
break -- for i
end
r = r + c * k
k = k * 62
end -- for i
end
return r
end
--[[--------------------------< makeDate >-----------------------
Given an ISO or spellt-out DMY/MDY date, format it in Hungarian.
]]
local function makeDate(date)
assert(type(date) == 'string', 'makeDate(): string expected, got ' .. type(date))
if date:match('%a+ %d+, %d+') or date:match('%d+ %a+ %d+') then
date = mw.getContentLanguage():formatDate('Y-m-d', date)
elseif date:match('%a+ %d+') then
date = mw.getContentLanguage():formatDate('Y-m', date)
end
local split = mw.text.split(date, '-')
local year = split[1]
local month = split[2] or ''
local day = split[3] or ''
local zmonth = month -- month with leading 0
month = month:match("0*(%d+)") -- month without leading 0
if not month or tonumber(month) < 1 or tonumber(month) > 12 then
return year
end
local zday = day
day = zday:match("0*(%d+)")
if not day or tonumber(day) < 1 or tonumber(day) > 31 then
return mw.getContentLanguage():formatDate('Y. F', year .. '-' .. zmonth .. '-01')
end
return mw.getContentLanguage():formatDate('Y. F j.', year .. '-' .. zmonth .. '-' .. zday)
end
--[[------------------------------< dateI >---------------------------
Add the appropriate suffix to the date, e.g. 2016-os, 2016. szeptemberi, 2016. szeptember 1-ji
]]
local function dateI(date)
assert(type(date) == 'string', 'dateI(): string expected, got ' .. type(date))
if date:match('^%d+$') then
return require('Modul:Nyelvtani modul').hanyas(date)
elseif date:match('^%d+%. [a-záéíóöőúüű]+$') then
return date .. 'i'
elseif date:match('^%d+%. [a-záéíóöőúüű]+ %d+%.') then
if date:match(' 1%.$') then
return date:gsub('%.$', '-ji')
else
return date:gsub('%.$', '-i')
end
else
return date
end
end
--[[--------------------------< decodeWebciteDate >-----------------------
Given a URI-path to Webcite (eg. /67xHmVFWP) return the formatted date
]]
local function decodeWebciteDate(path)
local dt = {}
dt.split = {}
dt.split = mw.text.split(path, "/")
-- valid URL formats that are not base62
-- http://www.webcitation.org/query?id=1138911916587475
-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
-- http://www.webcitation.org/1138911916587475
-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
if
mw.ustring.find( dt.split[2], "query", 1, plain) or
mw.ustring.find( dt.split[2], "cache", 1, plain) or
mw.ustring.find( dt.split[2], "getfile", 1, plain) or
tonumber(dt.split[2])
then
return "query"
end
dt.full = os.date("%Y-%m-%d", string.sub(string.format("%d", base62(dt.split[2])),1,10) )
dt.split = mw.text.split(dt.full, "-")
dt.year = dt.split[1]
dt.month = dt.split[2]
dt.day = dt.split[3]
if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then
return inlineRed("[Dátumhiba] (1)", "error")
end
if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then
return inlineRed("[Dátumhiba] (2)", "error")
end
if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then
return inlineRed("[Dátumhiba] (3)", "error")
end
local result, fulldate = pcall(makeDate, dt.full)
if not result then
return inlineRed("[Dátumhiba] (4)", "error")
else
return fulldate
end
end
--[[--------------------------< snapDateToString >-----------------------
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
return the formatted date eg. "2016. szeptember 1."
Handle non-digits in snapshot ID such as "re_" and "-" and "*"
]]
local function decodeWaybackDate(path)
local snapdate, snapdatelong, currdate
local safe = path
snapdate = string.gsub(safe, "^/w?e?b?/?", "") -- Remove leading "/web/" or "/"
safe = snapdate
local N = mw.text.split(safe, "/")
snapdate = N[1]
if snapdate == "*" then -- eg. /web/*/http..
return "index"
end
safe = snapdate
snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "") -- Remove any trailing "re_" from date
safe = snapdate
snapdate = string.gsub(safe, "[-]", "") -- Remove dashes from date eg. 2015-01-01
safe = snapdate
snapdate = string.gsub(safe, "[*]$", "") -- Remove trailing "*"
if not tonumber(snapdate) then
return inlineRed("[Dátumhiba] (2)", "error")
end
local dlen = string.len(snapdate)
if dlen < 4 then
return inlineRed("[Dátumhiba] (3)", "error")
end
if dlen < 14 then
snapdatelong = snapdate .. string.rep("0", 14 - dlen)
else
snapdatelong = snapdate
end
local year = string.sub(snapdatelong, 1, 4)
local month = string.sub(snapdatelong, 5, 6)
local day = string.sub(snapdatelong, 7, 8)
if not tonumber(year) or not tonumber(month) or not tonumber(day) then
return inlineRed("[Dátumhiba] (4)", "error")
end
if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
return inlineRed("[Dátumhiba] (5)", "error")
end
currdate = os.date("%Y")
if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
return inlineRed("[Dátumhiba] (6)", "error")
end
local result, fulldate = pcall(makeDate, year .. '-' .. month .. '-' .. day)
if not result then
return inlineRed("[Dátumhiba] (7)", "error")
else
return fulldate
end
end
--[[--------------------------< serviceName >-----------------------
Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID
]]
local function serviceName(url, nolink)
local tracking = "Kategória:Webarchive sablon egyéb archívumokkal"
local host = url.host
local bracketopen = "[["
local bracketclose = "]]"
if nolink then
bracketopen = ""
bracketclose = ""
end
url.service = "other"
url.tail = " a(z) " .. url.host .. " archívumban " .. inlineRed("Hiba: ismeretlen archívum-URL")
if mw.ustring.find( host, "archive.org", 1, plain ) then
url.service = "wayback"
url.tail = " a " .. bracketopen .. "Wayback Machine" .. bracketclose .. "-ben"
tracking = "Kategória:Webarchive sablon Wayback Machine linkkel"
elseif mw.ustring.find( host, "webcitation.org", 1, plain ) then
url.service = "webcite"
url.tail = " a " .. bracketopen .. "WebCite" .. bracketclose .. "-on"
tracking = "Kategória:Webarchive sablon WebCite linkkel"
elseif
mw.ustring.find( host, "archive.is", 1, plain ) or
mw.ustring.find( host, "archive.fo", 1, plain ) or
mw.ustring.find( host, "archive.today", 1, plain ) or
mw.ustring.find( host, "archive.il", 1, plain ) or
mw.ustring.find( host, "archive.ec", 1, plain )
then
url.service = "archiveis"
url.tail = " az " .. bracketopen .. "Archive.is" .. bracketclose .. "-en"
tracking = "Kategória:Webarchive sablon archiveis linkkel"
elseif mw.ustring.find( host, "archive[-]it.org", 1, plain ) then
url.service = "archiveit"
url.tail = " az " .. bracketopen .. "Archive-It" .. bracketclose .. "en"
elseif mw.ustring.find( host, "arquivo.pt", 1, plain) then
url.tail = " at the " .. "Portuguese Web Archive"
elseif mw.ustring.find( host, "loc.gov", 1, plain ) then
url.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose
elseif mw.ustring.find( host, "webharvest.gov", 1, plain ) then
url.tail = " at the " .. bracketopen .. "National Archives and Records Administration" .. bracketclose
elseif mw.ustring.find( host, "bibalex.org", 1, plain ) then
url.tail = " at " .. "[[Bibliotheca_Alexandrina#Internet_Archive_partnership|Bibliotheca Alexandrina]]"
elseif mw.ustring.find( host, "collectionscanada", 1, plain ) then
url.tail = " at the " .. "Canadian Government Web Archive"
elseif mw.ustring.find( host, "haw.nsk", 1, plain ) then
url.tail = " at the " .. "Croatian Web Archive (HAW)"
elseif mw.ustring.find( host, "veebiarhiiv.digar.ee", 1, plain ) then
url.tail = " at the " .. "Estonian Web Archive"
elseif mw.ustring.find( host, "vefsafn.is", 1, plain ) then
url.tail = " at the " .. "[[National and University Library of Iceland]]"
elseif mw.ustring.find( host, "proni.gov", 1, plain ) then
url.tail = " at the " .. bracketopen .. "Public Record Office of Northern Ireland" .. bracketclose
elseif mw.ustring.find( host, "uni[-]lj.si", 1, plain ) then
url.tail = " at the " .. "Slovenian Web Archive"
elseif mw.ustring.find( host, "stanford.edu", 1, plain ) then
url.tail = " at the " .. "[[Stanford University Libraries|Stanford Web Archive]]"
elseif mw.ustring.find( host, "nationalarchives.gov.uk", 1, plain ) then
url.tail = " at the " .. bracketopen .. "UK Government Web Archive" .. bracketclose
elseif mw.ustring.find( host, "parliament.uk", 1, plain ) then
url.tail = " at the " .. bracketopen .. "UK Parliament's Web Archive" .. bracketclose
elseif mw.ustring.find( host, "webarchive.org.uk", 1, plain ) then
url.tail = " at the " .. bracketopen .. "UK Web Archive" .. bracketclose
elseif mw.ustring.find( host, "nlb.gov.sg", 1, plain ) then
url.tail = " at " .. "Web Archive Singapore"
elseif mw.ustring.find( host, "pandora.nla.gov.au", 1, plain ) then
url.tail = " at " .. bracketopen .. "Pandora Archive" .. bracketclose
elseif mw.ustring.find( host, "perma.cc", 1, plain ) then
url.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
elseif mw.ustring.find( host, "perma-archives.cc", 1, plain ) then
url.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
elseif mw.ustring.find( host, "screenshots.com", 1, plain ) then
url.tail = " at Screenshots"
elseif mw.ustring.find( host, "wikiwix.com", 1, plain ) then
url.tail = " at Wikiwix"
elseif mw.ustring.find( host, "freezepage.com", 1, plain ) then
url.tail = " at Freezepage"
elseif mw.ustring.find( host, "webcache.googleusercontent.com", 1, plain ) then
url.tail = " at Google Cache"
else
tracking = "Kategória:Webarchive sablon ismeretlen archívummal"
end
track[tracking] = 1
return url
end
--[[--------------------------< parseExtraArgs >-----------------------
Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
For example: {{webarchive |url=.. |url4=.. |url7=..}}
Three url arguments not in numeric sequence (1..4..7).
Function only processes arguments numbered 2 or greater (in this case 4 and 7)
It creates numeric sequenced table entries like:
urlx.url2.url = <argument value for url4>
urlx.url3.url = <argument value for url7>
Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")
]]
local function parseExtraArgs(args)
local i, j, argurl, argurl2, argdate, argtitle
local ulx = {}
j = 2
for i = 2, maxurls do
argurl = "url" .. i
if trimArg(args[argurl]) then
argurl2 = "url" .. j
ulx[argurl2] = {}
ulx[argurl2]["url"] = args[argurl]
argdate = "date" .. j
if trimArg(args[argdate]) then
ulx[argurl2]["date"] = makeDate(args[argdate])
else
ulx[argurl2]["date"] = inlineRed("[Dátum hiányzik]", "warning")
end
argtitle = "title" .. j
if trimArg(args[argtitle]) then
ulx[argurl2]["title"] = args[argtitle]
else
ulx[argurl2]["title"] = nil
end
j = j + 1
end
end
return j - 2, ulx
end
--[[--------------------------< createTracking >-----------------------
Return data in track[] ie. tracking categories
]]
local function createTracking()
local sand = ''
for key, _ in pairs(track) do
sand = sand .. '[[' .. key .. ']]'
end
return sand
end
--[[--------------------------< createRendering >-----------------------
Return a rendering of the data in ulx[][]
]]
local function createRendering(ulx)
local sand, displayheader, displayfield
local period1 = "" -- For backwards compat with {{wayback}}
local period2 = "."
local index = (ulx.url1.date == 'index')
local indexstr = "archived"
if index then
indexstr = "archive"
end
local indexstr, datestr
if index then
indexstr, datestr = 'archívumok', 'indexe'
else
indexstr, datestr = 'archiválva', dateI(ulx.url1.date) .. ' dátummal'
end
-- For {{wayback}}, {{webcite}}
if ulx.url1.format == "none" then
if not ulx.url1.title and not ulx.url1.date then -- No title. No date
sand = "[" .. ulx.url1.url .. " Archiválva]" .. ulx.url1.tail
elseif not ulx.url1.title and ulx.url1.date then -- No title. Date.
if ulx.url1.service == "wayback" then
period1 = "."
period2 = ""
end
sand = "[" .. ulx.url1.url .. " " .. mw.getContentLanguage():ucfirst(indexstr) .. "] " .. datestr .. ulx.url1.tail .. period1
elseif ulx.url1.title and not ulx.url1.date then -- Title. No date.
sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail
elseif ulx.url1.title and ulx.url1.date then -- Title. Date.
sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail .. " (" .. indexstr .. " " .. datestr .. ")"
else
return nil
end
if ulx.url1.extraurls > 0 then -- For multiple archive URLs
local tot = ulx.url1.extraurls + 1
sand = sand .. period2 .. " További archívumok: "
for i=2, tot do
local indx = "url" .. i
if ulx[indx]["title"] then
displayfield = "title"
else
displayfield = "date"
end
sand = sand .. "[" .. ulx[indx]["url"] .. " " .. ulx[indx][displayfield] .. "]"
if i == tot then
sand = sand .. "."
else
sand = sand .. ", "
end
end
end
return sand
-- For {{cite archives}}
else
if ulx.url1.format == "addlarchives" then -- Multiple archive services
displayheader = "További archívumok: "
else -- Multiple pages from the same archive
displayheader = "További " .. dateI(ulx.url1.date) .. " archívumok: "
end
local tot = 1 + ulx.url1.extraurls
local sand = displayheader
for i=1, tot do
local indx = "url" .. i
displayfield = ulx[indx]["title"]
if ulx.url1.format == "addlarchives" then
if not displayfield then
displayfield = ulx[indx]["date"]
end
else
if not displayfield then
displayfield = i .. ". oldal"
end
end
sand = sand .. "[" .. ulx[indx]["url"] .. " " .. displayfield .. "]"
if i == tot then
sand = sand .. "."
else
sand = sand .. ", "
end
end
return sand
end
end
function p._webarchive(args)
-- URL argument (first)
local url1 = trimArg(args.url) or trimArg(args.url1)
if not url1 then
return inlineError("url", "Üres.") .. createTracking()
end
if mw.ustring.find( url1, "https://web.http", 1, plain ) then -- track bug
return inlineError("url", "https://web.http") .. createTracking()
end
if url1 == "https://web.archive.org/http:/" then -- track bug
return inlineError("url", "Érvénytelen URL") .. createTracking()
end
local uri1 = mw.uri.new(url1)
local url1_ = {
url = url1,
host = uri1.host
}
local ulx
url1_.extraurls, ulx = parseExtraArgs(args)
-- Nolink argument
local nolink = not not args.nolink
-- serviceName(uri1.host, nolink)
ulx.url1 = serviceName(url1_, nolink)
-- Date argument
local date = trimArg(args.date) or trimArg(args.date1)
if date == "*" and ulx.url1.service == "wayback" then
date = "index"
elseif not date and ulx.url1.service == "wayback" then
date = decodeWaybackDate( uri1.path )
if not date then
date = inlineRed("[Dátumhiba] (1)", "error")
end
elseif not date and ulx.url1.service == "webcite" then
date = decodeWebciteDate( uri1.path )
if date == "query" then
date = inlineRed("[Dátum hiányzik]", "warning")
elseif not date then
date = inlineRed("[Dátumhiba] (1)", "error")
end
elseif date then
date = makeDate(date)
else
date = inlineRed("[Dátum hiányzik]", "warning")
end
ulx.url1.date = date
-- Format argument
local format = trimArg(args.format)
if not format then
format = "none"
else
if format == "addlpages" then
if not ulx.url1.date then
format = "none"
end
elseif format == "addlarchives" then
format = "addlarchives"
else
format = "none"
end
end
ulx.url1.format = format
-- Title argument
ulx.url1.title = trimArg(args.title) or trimArg(args.title1)
local rend = createRendering(ulx)
if not rend then
rend = inlineRed('Hiba a [[Template:' .. tname .. '|' .. tname .. ']] sablonban: Meghatározhatatlan hiba. Kérjük, jelezd a sablon vitalapján.', 'error')
end
return rend .. createTracking()
end
function p.webarchive(frame)
local args = frame.args
if (args[1]==nil) and (args["url"]==nil) then -- if no argument provided than check parent template/module args
args = frame:getParent().args
end
return p._webarchive(args)
end
return p