Module:Naval Vessel Register URL/data extraction tool

p = {}

--[[-------------------------< N A M E _ C A S E >--------------------------------------------------------------

Make mixed case names from uppercase names: HARRY L GLUCKSMAN to Harry L Glucksman 

]]

local function name_case (name)
	local s =  mw.text.trim (name or '');										-- no leading or trailing white space
	local words = mw.text.split (s, ' ');										-- split the name at the spaces it there are any
	for i, s in ipairs (words) do												-- for each 'word' of the name
		s = string.lower (s)													-- make it lowercase
		words[i] = mw.getContentLanguage():ucfirst(s);							-- then uppercase the the first character
	end

	s = table.concat (words, ' ');												-- put the name back together
	words = mw.text.split (s, '\'');											-- now handle apostrophes if any (O'Brien, etc)
	for i, s in ipairs (words) do
		words[i] = mw.getContentLanguage():ucfirst(s);
	end
	return table.concat (words, '\\\'');										-- put the name back together and done
end


----------------------------< N V R _ E X T R A C T >----------------------------------------------------------

function p.nvr_extract (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	local out_table = {};														-- output goes here
	local group;																-- identify this collection of hull numbers
																				-- search for nvr links and associated hull numbers

	for id, ltr, num, name in mw.ustring.gmatch (content, 'SHIPSDETAIL_([^%.]+)%.HTML\">([%a%- %(%)]+)%s+(%d+).-_NameLink_%d\">([^<]+)') do
		table.insert (out_table, "[\'" .. ltr .. "-" .. num .."\'] = {\'" .. id .. "\', \'" .. name_case (name) .. "\'}");	-- make table entries in wp hull number format
		group = ltr:gsub ('T%-(.+)', '%1');										-- because ltr is local to the for loop; also strip off the 'T-' prefix
	end
																				-- make pretty output
	return "<br /><pre>&#9;[\'" .. group .. "\'] = <br />&#9;&#9;{<br />&#9;&#9;" .. table.concat (out_table, ',<br />&#9;&#9;') .. "<br />&#9;&#9;},<br /></pre>";
end

return p;