Moduli:Footnotes/whitelist/sort

--[[--------------------------< U N S O R T E D _ A D D >------------------------------------------------------

add members of the unsorted list based on first character(upper or lower case) following 'CITEREF'.  If <index>
is longer than one character (ODNB, UNSORTED, whatever), return without making any additions

]]

local function unsorted_add (index, unsorted, temp)
	local pattern;

	if 1 < index:len() then														-- only add citerefs to the single-character lists
		return;
	end

	if '#' == index then
		pattern = '%[\'CITEREF%d';												-- first character is a digit
	else
		pattern = '%[\'CITEREF[' .. index .. index:lower() .. ']';
	end
	
	for k, v in pairs (unsorted) do												-- spin through the unsorted listing
		if v and k:match (pattern) then											-- if not nil and matches the pattern
			temp[k] = true;														-- add to the alpha listing
			unsorted[k] = nil;													-- and then disable this one in the unsorted listing
		end
	end
end


--[[--------------------------< L I S T _ P A R S E >----------------------------------------------------------

parse apart plain-text list of a key / value pair into a table where the plain-text k/v becomes the key in a lua
table with the assigned value true.  Do this to catch multiples of the same k/v and to support the easy insertion
of k/v pairs from the unsorted list.

also normalize k/v format

]]

local function list_parse (index, list, temp)
	for citeref in list[index]:gmatch ('\t*([^\r\n]+)') do
		citeref = mw.text.trim (citeref);
		citeref = citeref:gsub (' *%[ *\' *', '[\'');							-- normalize opening sq brackets
		citeref = citeref:gsub (' *\' *%] *', '\']');							-- normalize closing sq brackets
		citeref = citeref:gsub (' *{ *\' *', '{\'');							-- normalize opening braces
		citeref = citeref:gsub (' *\' *} *', '\'}');							-- normalize closing braces
		citeref = citeref:gsub ('([%]}]) *, *', '%1,');							-- normalize trailing comma
		citeref = citeref:gsub (' *= *', ' = ');								-- normalize assignment operator
		if not temp[citeref] then
			temp[citeref] = true;												-- a constant value so that we can know if the 'key' already exists (avoid duplication)
		end
	end
end


--[[--------------------------< W H I T E L I S T _ S O R T >--------------------------------------------------

maintenance utility for Module:Footnotes/whitelist.  The whitelist is segregated into sections according to the
section heading (single alpha character A-Z and '#').  This utility adds whitelist entries from the UNSORTED
heading to the correct alpha heading.  After appropriate unsorted entries have been added to a section, the
section is sorted and then saved.

for this to work, the unsorted header name must be: UNSORTED

this utility take no arguments from frame.  frame is provided only for expandTemplate()
]]

local function whitelist_sort(frame)
	local headers = {};															-- headings are stored here and used for loop control
	local list = {}																-- table of tables of the plain-text citerefs
	local unsorted = {};														-- table of k/v pairs where k is the unsorted citerefs and v is true or nil (after added to alpha list)
	local result = {};															-- sorted and formatted section end up here
	local temp, temp2 = {}, {};

	local content = mw.title.new('Module:Footnotes/whitelist'):getContent();	-- read the module plain text
	
	local find_pattern = '%s*local%s+whitelist%s+=%s+';							-- find the whitelist table
	local tstart, tend = content:find (find_pattern);

	content = content:match ('%b{}', tstart);									-- get the content of the whitelist table
	content = content:gsub ('^{[\r\n]+', ''):gsub ('[\r\n]+}$', '');			-- remove leading and trailing braces and newlines
	
	for header in content:gmatch ('%-+<([#%a%d%s]+)>%-+') do					-- get pseudo-headers
		table.insert (headers, mw.text.trim (header));							-- save the captures in the headers table
	end

	for i, header in ipairs (headers) do										-- separate whitelist entries into individual alpha groupings
		local pattern = '%-+<%s*' .. header .. '%s*>%-+';
		tstart, tend = content:find (pattern);									-- find this header
		if tstart and headers[1+i] then											-- if not the last header
			list[header] =  mw.text.trim (content:match ('([^<]-)%-+<', tend+1));	-- begin at end of header; +1 to leave-off the last '-' in the header
		elseif tstart then														-- must be the last header (usually UNSORTED)
			list[header] =  mw.text.trim (content:match ('.*', tend+1));		-- begin at end of header; +1 to leave-off the last '-' in the header
		else
			error ('shouldn\'t be here; header: ' .. header or '(nil or empty string)' .. '; tstart: ' .. tstart or '(nil or empty string)');
		end
	end

	list_parse ('UNSORTED', list, unsorted);									-- make a separate unsorted list
	list['UNSORTED'] = '';														-- blank the unsorted source

	for i, v in ipairs (headers) do
		temp, temp2 = {}, {};													-- reinit temp &  temp2

		list_parse (v, list, temp);												-- parse the list
		unsorted_add (v, unsorted, temp);										-- then add appropriate citerefs from the unsorted list

		for k, v in pairs (temp) do												-- get 'key' value from temp{} and make a sequence from it in temp2{} so it can be sorted
			if v then
				table.insert (temp2, k);										-- unsorted listing gets 'emptied' by setting v nil; don't add nil citerefs to temp2
			end
		end
		table.sort (temp2);														-- sort this section
		table.insert (result, '----------< ' .. v .. ' >----------\n\t' .. table.concat (temp2, '\n\t') .. '\n\n');	-- add a header, make a long string, and add to result{}
	end

	return frame:extensionTag {name="syntaxhighlight", content='local whitelist = {\n'.. table.concat (result) .. '\t}', args = {lang="lua"}};
end


--[[-------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
]]

return {
	whitelist_sort = whitelist_sort,
	}