Module:R:Woodhouse
Jump to navigation
Jump to search
- The following documentation is located at Module:R:Woodhouse/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
--[[
This module looks up the Greek entry title in "Module:R:Woodhouse/psia1_to_infs",
a list of verbs, and adds any infinitive forms to a list including the page
title itself, in both potential proper and common forms. This list of Greek
forms is used to retrieve English headwords in Woodhouse's dictionary from
"Module:R:Woodhouse/reverse_index". The infinitives step is necessary because
Woodhouse mostly thus identifies verbs. The function get_page() then looks up
each English headword alphabetically in "Module:R:Woodhouse/page_headwords", or
"Module:R:Woodhouse/page_headwords_proper", or both, per
"Module:R:Woodhouse/proper_or_both", to find page numbers in the paper edition
of the dictionary. This `abstract' page number is slightly altered with a few
exceptions to obtain the page number used in the ARTFL project's URL. These
URLs are returned with bibliographical information and HTML for display. A
remaining issue is that, about 5% of the time, the page number will be off by
one or two. This problem may be slightly intricate to solve completely.
8 May 2020: Changed reverse_index implementation to reverse_index_tab_delimited implementation to save memory (The Lua table consumed 8x the binary text size.)
]]
local export = {}
local function remove_duplicates(ls)
local hash = {}
local ls0 = {}
for i, v in ipairs(ls) do
if not hash[v] then
table.insert(ls0, v)
hash[v] = true
end
end
return ls0
end
local function concat(l1,l2)
local l3 = {}
if l1 then for i,v in pairs(l1) do l3[#l3+1] = v end end
if l2 then for i,v in pairs(l2) do l3[#l3+1] = v end end
return l3
end
local function b_search(ys, x, L, H)
L = L or 1
H = H or #ys
if L < 0 then
error('L < 0')
end
while L < H do
M = math.floor((L+H) / 2)
if ys[M] < x then L = M+1 else H = M end
end
return L
end
--comment for calculate_page_number_divergence:
--page_number_divergence results from a few buffer pages before the first
--headword, and subsequently, a few multi-page entries leaving some pages
--without headwords. Each time this latter takes place, the index of
--page-initial headwords drifts from the physical page number.
local function calculate_page_number_divergence(w0, is_proper)
return 4 + ((is_proper and 4) or ((w0 >= 'taking' and 4) or (w0 >= 'setting' and 3) or (w0 >= 'putrefaction' and 2) or 1))
end
local function get_page(page_headwords, page_headwords_proper, w)
local is_proper = mw.getLanguage('en'):ucfirst(w) == w
local w0 = mw.getLanguage('en'):lcfirst(w)
local p_n_divergence = calculate_page_number_divergence(w0, is_proper)
if not is_proper then
return (b_search(page_headwords, w0, 0, 995) - 1) + p_n_divergence
else
return (b_search(page_headwords_proper, w0, 0, 32) - 1) + 995 + p_n_divergence
end
end
local function uc1_eng(x)
return mw.getLanguage('en'):ucfirst(x)
end
local function f_reverse_index(proper_or_both,reverse_index,title)
local headwords_eng = {}
local rix = reverse_index[title]
if not (rix == nil) then
for i, v in pairs(rix) do
local pbc = proper_or_both[v]
if pbc == nil then
headwords_eng[#headwords_eng+1] = v
else
headwords_eng[#headwords_eng+1] = uc1_eng(v)
if pbc == "b" then
headwords_eng[#headwords_eng+1] = v
end
end
end
end
return headwords_eng
end
local function f_reverse_index_tab_delimited(proper_or_both,reverse_index_tab_delimited,title)
local headwords_eng = {}
for English_words in reverse_index_tab_delimited:gmatch(
"%f[^%z\n]" .. require("Module:string utilities").pattern_escape(title)
.. "\t([^\n]+)%f[%z\n]") do
for word in English_words:gmatch("[^\t]+") do
table.insert(headwords_eng, word)
end
end
return headwords_eng
end
local function load_infinitives(w)
--return mw.loadData("Module:R:Woodhouse/psia1_to_infs")[w]
return require("Module:data tables").index_table("grc_RWoodhouse_lemma_to_infinitives", w)
end
--grc_RWoodhouse_lemma_to_headwords
local function print_html(title)
--local psia1_to_infs = mw.loadData("Module:R:Woodhouse/psia1_to_infs") --for calculating title_addenda
--local reverse_index = mw.loadData("Module:R:Woodhouse/reverse_index") --for f_reverse_index_
local reverse_index_tab_delimited = require("Module:R:Woodhouse/reverse_index_tab_delimited") --for f_reverse_index_tab_delimited
local proper_or_both = mw.loadData("Module:R:Woodhouse/proper_or_both") --for f_reverse_index
local page_headwords = mw.loadData("Module:R:Woodhouse/page_headwords") --for get_page
-- page_headwords also requires a select_all function in data_tables to work efficiently
local page_headwords_proper = mw.loadData("Module:R:Woodhouse/page_headwords_proper") --for get_page
local title_uc = mw.getContentLanguage():ucfirst(title)
local title_addenda = load_infinitives(title) --psia1_to_infs[title]
local titles = concat({title, title_uc}, title_addenda)
local headwords_eng = {}
for i, title in ipairs(titles) do
headwords_eng = concat(headwords_eng, f_reverse_index_tab_delimited(proper_or_both, reverse_index_tab_delimited, title)) --changed reverse_index to reverse_index_tab_delineated
end
headwords_eng = remove_duplicates(headwords_eng)
table.sort(headwords_eng)
local lst = {}
local count = 0
for k, v in pairs(headwords_eng) do
local nPage = get_page(page_headwords, page_headwords_proper, v)
table.insert(lst, "<li>[https://artflsrv03.uchicago.edu/cgi-bin/efts/sqldbs/WOODHOUSE/woodhouse.py?pagenumber="..string.format(nPage-5).."&pageturn=1 "..v.."] idem, page "..(nPage-5)..".</li>")
count = count + 1
end
local expandtext = count .. " headword" .. ( count == 1 and "" or "s" )
table.insert(lst, 1, "<div class='mw-collapsible mw-collapsed' style='display: inline' data-expandtext='"..expandtext.."><ul>")
table.insert(lst, "</ul></div>")
return table.concat(lst)
end
function export.reverse_index(frame)
local args = frame:getParent().args
local title = args['w'] or mw.title.getCurrentTitle().text
if (not args['w'] or args['w'] == "") and mw.title.getCurrentTitle().nsText == "Template" then
return ""
else
return print_html(title)
end
end
return export