local p = {}
local getArgs = require('Module:Arguments').getArgs
local yesno = require('Module:Yesno')
local mRedirect = require('Module:Redirect')
local mUnicode = require('Module:Unicode data')
local mCategory = require('Module:Unicode data/category')
local mVersion = require('Module:Unicode data/version')
local mAliases = require('Module:Unicode data/aliases')
local frame
-------------------
-- General settings
-------------------
local pdfLink = "[https://www.unicode.org/charts/PDF/%s.pdf"
.. " Official Unicode Consortium code chart] (PDF)"
local cellType = {
reserved = {
note = "Grey areas indicate non-assigned code points",
flag = false
},
noncharacter = {
note = "Black areas indicate [[Universal Character Set characters#Noncharacters|noncharacters]] (code points that are guaranteed never to be assigned as encoded characters in the Unicode Standard)",
flag = false
}
}
local hardcodedNumberedAbbrSets = {
-- Block: Variation Selectors
{first = 0xFE00, last = 0xFE0F, str = "VS<br>", startNum = 1},
-- Block: Variation Selectors Supplement
{first = 0xE0100, last = 0xE01EF, str = "VS<br>", startNum = 17},
-- Block: Sutton SignWriting
-- SIGNWRITING FILL MODIFIER-2 -> SW F#
{first = 0x1DA9B, last = 0x1DA9F, str = 'SW<br>F', startNum = 2},
-- Block: Sutton SignWriting
-- SIGNWRITING ROTATION MODIFIER-2 -> SW R#
{first = 0x1DAA1, last = 0x1DAAF, str = 'SW<br>R', startNum = 2},
}
local specialFormatSets = {
--Unicode block: Tags
--tag for character -> character
{first = 0xE0021, last = 0xE007E,
func = function(codepoint, abbr)
return '&#x'.. string.format("%04X", (codepoint - 0xE0000)) .. ';'
end
},
}
-------------------------
-- pseudo-object oriented
-------------------------
function newCodepoint(x)
if type(x) == "string" then
return {
hex = x,
int = tonumber(x, 16)
}
elseif type(x) == "number" then
return {
int = x,
hex = string.format("%04X", x)
}
end
end
-------------------------
-- Sundry small functions
-------------------------
local function expandTemplate(template, argslist)
return frame:expandTemplate{
title = template,
args = argslist
}
end
local function fromHex(hexStr)
return tonumber(hexStr, 16)
end
local function splitColonList(strList)
local tab = {}
local segments = mw.text.split(strList, '[;\n\t]')
for _,v in pairs(segments) do
local tmp = mw.text.split(v, ':')
if tmp[1] and tmp[2] then
tab[fromHex(tmp[1])] = mw.text.trim(tmp[2])
end
end
return tab
end
local function getCategory(codepoint)
local category = mUnicode.lookup_control(codepoint.int)
if category ~= "unassigned" then
return category
elseif mUnicode.is_noncharacter(codepoint.int) then
return "noncharacter"
else
return "reserved"
end
end
local function getAliasValues(n, key)
local tbl = {}
if mAliases[n] then
for i,t in ipairs(mAliases[n]) do
if(not key or t[1] == key) then
table.insert(tbl, t[2])
end
end
end
return tbl
end
---------------------
-- A single unicode cell within the table
---------------------
local function getCellAbbr(codepoint, category, args)
local function getHardcodedNumberedAbbr(codepoint)
for key, value in pairs(hardcodedNumberedAbbrSets) do
if codepoint.int >= value.first
and codepoint.int <= value.last then
return value.str .. (codepoint.int - value.first + value.startNum)
end
end
return nil
end
--for key, value in pairs(specialFormatSets) do
-- if codepoint.int >= value.first
-- and codepoint.int <= value.last then
-- return value.func(codepoint.int, alias)
-- end
--end
local function getAliasAbbr(codepoint)
local tbl = getAliasValues(codepoint.int, "abbreviation")
return tbl[1] or nil
end
local function abbrFromString(codepoint, args)
local abbr = ""
local name = mUnicode.lookup_name(codepoint.int)
local words = mw.text.split(name, ' ')
for _,w in pairs(words) do
abbr = abbr .. string.sub(w, 1, 1)
end
return abbr
end
--override
if (args['abbr_sub'] and args['abbr_sub'][codepoint.int]) then
return args['abbr_sub'][codepoint.int]
end
--exception listed at top
local abbr1 = getHardcodedNumberedAbbr(codepoint)
if abbr1 then return abbr1 end
--abbr on list
local abbr2 = getAliasAbbr(codepoint)
if abbr2 then return abbr2 end
--make own abbr
if category == "control" or category == "format" then
return '<span class="red">' .. abbrFromString(codepoint) .. '</span>'
end
return false
end
local function aliasesStr(codepoint)
local aliasStr = ""
if mAliases[codepoint.int] then
for i,t in ipairs(mAliases[codepoint.int]) do
aliasStr = aliasStr .. " (alias " .. t[2] .. ")"
end
end
return aliasStr
end
local function linkChar(unicodeChar, codepoint, args)
if (args['link_sub'] and args['link_sub'][codepoint.int]) then
return '[[' .. args['link_sub'][codepoint.int]
.. '|' .. unicodeChar .. ']]'
elseif args['link'] == "wiki" then
local redir = mRedirect.luaMain(unicodeChar, false)
-- '[[' .. redir .. '|' .. unicodeChar .. ']]'
return expandTemplate('Link if exists', {unicodeChar})
elseif args['link'] == "wikt" then
return '[[wikt:' .. unicodeChar .. '|' .. unicodeChar .. ']]'
end
end
local function createCell(cell, codepoint, args)
-- sub functions
local function emptyCell(categoryStr)
cellType[categoryStr].flag = true
-- flag[categoryStr] = true
end
local function abbrCell(abbr)
cell:addClass("abbr-cell")
cell:tag("div"):addClass("abbr-box"):wikitext(abbr)
end
-- main func begins
local category = getCategory(codepoint)
cell:addClass(category)
local abbr = getCellAbbr(codepoint, category, args)
if category == "reserved" or category == "noncharacter" then
emptyCell(category)
elseif abbr then
abbrCell(abbr)
else
local unicodeChar = '&#x'.. codepoint.hex .. ';'
unicodeChar = linkChar(unicodeChar, codepoint, args) or unicodeChar
if args['suffix'] and args['suffix'][codepoint.int] then
unicodeChar = unicodeChar
.. '&#x' .. args['suffix'][codepoint.int] .. ';'
cell:addClass("modified")
end
if args['wrapper'] then
unicodeChar = expandTemplate(args['wrapper'], {unicodeChar})
elseif args['font'] then
cell:css("font-family", "'" .. args['font'] .. "'")
--unicodeChar = tostring(
-- mw.html.create("div")
-- :css("font-family", "'" .. args['font'] .. "'")
-- :wikitext(unicodeChar)
--)
end
cell:wikitext(unicodeChar)
end
local name = mUnicode.lookup_name(codepoint.int)
name = string.match(name, "<([a-z]+)-%w+>") or name
cell:attr("title",
'U+' .. codepoint.hex ..
': ' .. name
.. aliasesStr(codepoint)
)
end
---------------------
-- For loops creating the grid of cells
---------------------
local function createTableBody(body, rangeStart, rangeEnd, args)
--0 through F label row
local labelRow = body:tag("tr")
labelRow:tag("th")--empty corner cell
:css("width", "45pt")
for colIndex=0, 15 do
labelRow:tag("th"):wikitext(string.format("%X", colIndex))
:css("width", "20pt")
end
--real body of table
local rowStart = fromHex(rangeStart.hex:sub(1, -2))--subtract last char from string
local rowEnd = fromHex(rangeEnd.hex:sub(1, -2))
for rowIndex=rowStart, rowEnd do
local rowHex = string.format("%03X", rowIndex)
local row = body:tag("tr")
row:tag("th"):wikitext("U+".. rowHex .. "<i>x</i>")
:attr("rowspan", "2")
for colIndex=0, 15 do
local cell = row:tag("td")
--rowHex .. string.format("%X", colIndex)
createCell(cell,
newCodepoint(rowIndex*16 + colIndex),
args
)
end
local subrow = body:tag("tr")
for colIndex=0, 15 do
subrow:tag("td"):addClass("codepoint")
:wikitext(string.format("%04X", rowIndex*16 + colIndex))
end
end
end
---------------------
-- Header at top of table
---------------------
local function createTableHeader(head, name, id)
local page = mRedirect.luaMain(name .. " (Unicode block)", false)
head:tag("th")
:addClass("header")
:attr("colspan", "100%")
:wikitext(
"<b>[[" .. page .. "|" .. name .. "]]</b>"
.. "<br />" .. string.format(pdfLink, id)
.. expandTemplate('ref label', {id .. '_as_of_Unicode_version', 1})
)
end
---------------------
-- Footer at bottom of table
---------------------
local function createTableFooter(foot, id, note)
local th = foot:tag("th")
:addClass("footer")
:attr("colspan", "100%")
:wikitext("<b>Notes</b>")
local list = th:tag("ol")
list:tag("li"):wikitext(
expandTemplate('note', {id .. '_as_of_Unicode_version'}),
expandTemplate(
'Unicode version',
{prefix= 'Asof', version= mVersion}
)
)
--Notes about categories of cells
for key, value in pairs(cellType) do
if value.flag then
list:tag("li"):wikitext(value.note)
end
end
--Manual note
if note then
list:tag("li"):wikitext(note)
end
end
---------------------
-- Creates table
---------------------
local function createTable(rangeStart, rangeEnd, args)
local id = 'U' .. rangeStart.hex
cellType.reserved.flag = false
cellType.noncharacter.flag = false
local tbl = mw.html.create("table")
:addClass("wikitable")
:addClass("unicode-block")
if args['blockname'] then
createTableHeader(tbl, args['blockname'], id)
end
createTableBody(tbl, rangeStart, rangeEnd, args)
createTableFooter(tbl, id, args['note'])
return tostring(tbl)
end
---------------------
-- Main
---------------------
function p.main(frameArg)
frame = frameArg
local args = getArgs(frame)
for _, argName in ipairs({'abbr_sub', 'link_sub', 'suffix'}) do
if args[argName] then
args[argName] = splitColonList(args[argName])
end
end
-- look up block by na,e
if args['blockname'] then
local range = mUnicode.get_block_info(args['blockname'])
if range == nil then
return "invalid blockname"
end
return createTable(
newCodepoint(range[1]),
newCodepoint(range[2]),
args
)
-- block given as start and end of range
elseif args['rangestart'] and args['rangeend'] then
return createTable(
newCodepoint(args['rangestart']),
newCodepoint(args['rangeend']),
args
)
end
end
return p