This module is rated as alpha. It is ready for third-party input, and may be used on a few pages to see if problems arise, but should be watched. Suggestions for new features or changes in their input and output mechanisms are welcome. |
Usage
edit{{#invoke:Import table|import |page= |config= |common= }}
This module is designed to import data from tables in Wikipedia articles into Wikidata. The first column of the table must be the name of the item.
- If this is a link (or a redirect) to an existing article which has a corresponding Wikidata item, then the data will be imported into this item.
- If the first column contains a valid identifier for a Wikidata item (e.g. Q123456) then the data will be imported into this item.
- If this is plain text or a redlink, then the data will be imported into a new Wikidata item.
Parameters
edit- page - the name of the article/page to parse, e.g.
|page=List of dams in South Africa
- config - details about what type of data is held in each column of the table formatted as type-property-option. Currently recognised types are:
- label - the name of the article
- wikilink - a link to an article which should be the value of the property, e.g.
wikilink-P131
- quantity - specify the unit in the third parameter, e.g.
quantity-P2048-Q11573
- year - a 4-digit year, e.g.
year-P571
- text - any text to import into a string datatype, e.g.
text-P3562
- coord - coordinate position, inside the {{coord}} template, e.g.
coord-P625
- donotuse (or any other unrecognised type) - indicates a column that will not be imported
- common - a set of values that every item in the list should have, formatted in pairs as property-value, e.g.
|common=P31-Q12323,P17-Q258
Notes
edit- The module will not import any claim if there is already a statement for that property in Wikidata, no matter if the value is the same or different to that being imported, and even if the value is marked as deprecated.
require("strict")
local p = {}
local function resolveqid(label)
local resolveEntity = require("Module:ResolveEntityId")._id
local qid
local rawlabel
if label then
rawlabel = string.match(label,'%[%[([^%|%]]+)%|') or string.match(label,'%[%[([^%|%]]+)%]%]')
if rawlabel then
qid = resolveEntity(rawlabel)
else
rawlabel = label
end
end
return qid,rawlabel
end
local function tidystring(string)
local tidy = mw.ustring.gsub(string,"%<ref.+%<%/ref%>","") -- remove any references with <ref .. </ref>
tidy = mw.ustring.gsub(tidy,"%<ref.+%/%>","") -- remove any references with <ref .. />
tidy = mw.text.trim(tidy) -- trim spaces
return tidy
end
function p.import(frame)
local tab = "|"
local wikiqid = "Q328" -- QID for English Wikipedia
local eol = "<br>" -- end of line string
local coord2text = require("Module:Coordinates/sandbox")._coord2text
local args = frame.args or frame:getParent().args
if not args.page then
return "No page specified."
end
if not args.config then
return "No configuation."
end
local config = {}
for c1,col in ipairs(mw.text.split(args.config,",")) do
config[c1] = mw.text.split(col,"-")
end
local common = {}
if args.common then
for c1,prop in ipairs(mw.text.split(args.common,",")) do
common[c1] = mw.text.split(prop,"-")
common[common[c1][1]] = common[c1][2] -- create index, e.g. common["P31"]=Q39715
end
end
local content = mw.title.new(args.page):getContent() -- read page specified
content = string.match(content,"%{%|(.+)%|%}") -- keep table only
content = string.gsub(content,"||","\n|") -- use \n| for column breaks
local rows = mw.text.split(content,"|-\n",true) -- split table into rows
table.remove(rows,1) -- remove table definition
table.remove(rows,1) -- remove heading row
local output = ""
local v2 = ""
for rn,row in ipairs(rows) do
local columns = mw.text.split(row,"\n|") -- split table rom into columns
table.remove(columns,1) -- remove content before the first \n| character
local label = columns[1]
if label then
label = tidystring(label)
local qid
qid,label = resolveqid(label) -- resolve qid if first column is link
if not qid then
qid = string.match(label,"Q%d+") -- check if QID is specified in first column
end
local entity
if qid then
entity = mw.wikibase.getEntity(qid)
else
v2 = v2 .. "CREATE" .. eol
qid = "LAST" -- creating new item, so use LAST
v2 = v2 .. qid .. tab .. 'Len' .. tab .. '"' .. label .. '"' .. eol
if common["P31"] then -- create auto-description
v2 = v2 .. qid .. tab .. 'Den' .. tab .. '"' .. mw.wikibase.getLabel(common["P31"])
if common["P17"] then -- add country
v2 = v2 .. ' in '
if common["P131"] then
v2 = v2 .. mw.wikibase.getLabel(common["P131"]) .. ", "
end
v2 = v2 .. mw.wikibase.getLabel(common["P17"])
end
v2 = v2 .. '"' .. eol
end
for c = 1,#common do
v2 = v2 .. qid .. tab .. common[c][1] .. tab .. common[c][2] .. eol
end
end
local function addtov2(prop,val)
v2 = v2 .. qid .. tab .. prop .. tab .. val .. tab .. "S143" .. tab .. wikiqid .. eol
return v2
end
for cn,col in ipairs(columns) do
if config[cn] then
local import = true
if entity then
if config[cn][2] then
if entity:getAllStatements(config[cn][2])[1] then -- statement already present for this property
import = false
end
else -- no property specified to import to
import = false
end
end
if import then
col = tidystring(col)
if config[cn][1] == "wikilink" then
local val = resolveqid(col)
if val then
v2 = addtov2(config[cn][2],val)
end
elseif config[cn][1] == "year" then
local val = string.match(col,"%d%d%d%d")
if val then
v2 = addtov2(config[cn][2],"+" ..val .. "-00-00T00:00:00Z/9")
end
elseif config[cn][1] == "quantity" then
local val = string.gsub(col,",","") -- remove any commas
val = string.match(val,"%d+%.?%d*") -- extract value, possibly from inside convert template
if val then
if string.upper(string.sub(config[cn][3],1,1)) == "Q" then
config[cn][3] = string.sub(config[cn][3],2)
end
v2 = addtov2(config[cn][2],val .. "U" .. config[cn][3])
end
elseif config[cn][1] == "text" then
if col ~= "" then
v2 = addtov2(config[cn][2],'"' .. col .. '"')
end
elseif config[cn][1] == "coord" then
if col ~= "" then
local val = mw.getCurrentFrame():preprocess(col)
v2 = addtov2(config[cn][2],"@" .. coord2text(val,"lat") .. "/" .. coord2text(val,"long"))
end
end
end
end
end
end
end
return v2
end
return p