Module:Sandbox/Peter coxhead/Tests

From WikiProjectMed
Jump to navigation Jump to search
require('strict')
local TaxonItalics = require('Module:TaxonItalics')
local ItalicTitle = require('Module:Italic title')
local p = {}
local l = {} -- separate out local functions

function l.show(items)
	local result = ""
	for i = 1,#items,1 do
		result = result .. "|" .. items[i] .. "<sub>" .. mw.ustring.len(items[i]) .. "</sub>"
	end
	return result .. "|"
end

-- *****************************************************************************
-- ========= Constants for states =========
local startState = 0
local italState = 1
local whiteSpState = 2
local nonItalState = 3
local entityState = 4
local endState = 9

-- Split a string representing a taxon name into 'units'.
function l.doSplitTaxonName(str)
	-- connecting terms that are not italicized
	local cTerms = {
		--subsp.
		subspecies = "subsp.",
		["subsp."] = "subsp.",
		subsp = "subsp.",
    	["ssp."] = "subsp.",
    	ssp = "subsp.",
    	--var.
    	varietas = "var.",
    	["var."] = "var.",
    	var = "var.",
    	--subvar.
    	subvarietas = "subvar.",
    	["subvar."] = "subvar.",
    	subvar = "subvar.",
    	--f.
    	forma = "f.",
    	["f."] = "f.",
    	f = "f.",
    	--subf.
    	subforma = "subf.",
    	["subf."] = "subf.",
    	subf = "subf.",
		--subg.
    	subgenus = "subg.",
    	["subg."] = "subg.",
    	subg = "subg.",
    	--sect.
    	section = "sect.",
    	["sect."] = "sect.",
    	sect = "sect.",
    	--subsect.
    	subsection = "subsect.",
    	["subsect."] = "subsect.",
    	subsect = "subsect.",
    	--ser.
    	series = "ser.",
    	["ser."] = "ser.",
    	ser = "ser.",
    	--subser.
    	subseries = "subser.",
    	["subser."] = "subser.",
    	subser = "subser.",
    	--cf.
    	cf = "cf.",
    	["cf."] = "cf.",
    	["c.f."] = "cf."
    	}
	local specialChrs = '[%(%)×%+]' -- pattern listing all specially treated characters
	local units = {}
	local states = {}
	local state = startState
	local j = 0
	local currUnit = ''
	local chr
	-- startUnit starts a new unit to be taken from the string str
	local startUnit = function (nextState)
		currUnit = chr
		state = nextState
	end
	-- saveUnit saves the current unit taken from the string str and starts a
	-- new one
	local saveUnit = function (nextState)
			-- check for words that don't get italicized (?at this position)
		if state == italState and cTerms[currUnit] then
			currUnit = cTerms[currUnit]
			state = nonItalState
		end
		j = j + 1
		units[j] = currUnit
		states[j] = state
		currUnit = chr
		state = nextState
	end
	local n = mw.ustring.len(str)
	local i = 1
	while i <= n do
		chr = mw.ustring.sub(str, i, i)
		if state == startState then
			if chr == ' ' then
				startUnit(whiteSpState)
			elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' 
				startUnit(nonItalState)
			elseif chr == '&' then
				startUnit(entityState)
			else -- other kind of character
				startUnit(italState)
			end
		elseif state == whiteSpState then
			if chr == ' ' then
				-- ignore
			elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' 
				saveUnit(nonItalState)
			elseif chr == '&' then
				saveUnit(entityState)
			else -- other kind of character
				saveUnit(italState)
			end
		elseif state == nonItalState then
			if chr == ' ' then
				saveUnit(whiteSpState)
			elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' 
				saveUnit(nonItalState)
			elseif chr == '&' then
				saveUnit(entityState)
			else -- other kind of character
				saveUnit(italState)
			end
		elseif state == entityState then
			if chr == ';' then
				currUnit = currUnit .. chr
				chr = ''
				-- nonbreaking spaces are treated a whitespace
				if currUnit == '&nbsp;' or currUnit == '&#160;' or currUnit == '&#xA0;' or currUnit == '&#x00A0;' then state = whiteSpState end
				saveUnit(startState)
			else
				currUnit = currUnit .. chr
			end
		else -- state == italState
			if chr == ' ' then
				saveUnit(whiteSpState)
			elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' 
				saveUnit(nonItalState)
			elseif chr == '&' then
				saveUnit(entityState)
			else -- other kind of character
				currUnit = currUnit .. chr
			end
		end
		i = i + 1
	end
	j = j + 1
	-- save the last unit and its state
	units[j] = currUnit
	states[j] = state
	return units, states
end

function p.italicizeTaxonName(frame)
    local str = frame.args[1] or ""
	-- first clean up the name
    str = mw.ustring.gsub(str, "%s+", " ")
    str = mw.ustring.gsub(str, "'''", "")
    str = mw.ustring.gsub(str, "''", "")
    str = mw.ustring.gsub(str, "</?span.->", "")
    -- remove the last 'word' if it appears to be a disambiguating term
    local last = l.lastWord(str)
    local ln = mw.ustring.len(last)
    if mw.ustring.match(last, "%([a-z].*%)") then
    	str = mw.ustring.sub(str, 1, -ln-1)
    else
    	last = ''
    end
    -- now split up the name into 'units'
    units, states = l.doSplitTaxonName(str)
    -- finally, put everything back together, correctly italicized
    return l.joinUnits(units, states) .. last
end

function l.joinUnits(units, states)
    local res = '\n|'
    local j = 1
    local n = #units
    while j <= n do
    	res = res .. units[j] .. '<sub>' .. tostring(states[j]) .. '</sub>|'
    	j = j + 1
    end
    res = '\n'
    states[n+1] = endState
    states[n+2] = endState
    local openItalics = true
    j = 1
    while j <= n do
    	-- res = res .. '<sub>' .. tostring(states[j]) .. tostring(italState) .. '</sub>'
    	if states[j] == italState then
    		if openItalics then
    			res = res .. "<i>" .. units[j]
    			openItalics = false
    		else
    			res = res .. units[j]
    		end
    		if not (states[j+1] == whiteSpState and states[j+2] == italState) then
    			res = res .. "</i>"
    			openItalics = true
    		end
    	else
    		res = res .. units[j]
    	end
    	j = j + 1
    end
    return res
end
-- *****************************************************************************

function p.test(frame)
	local taxon = frame.args[1] or ""
	local item = frame.args[2] or ""
	local ok, info = pcall(frame.expandTemplate, frame, { title = 'Template:Taxonomy/' .. taxon, args = {['machine code'] = item } })
	if ok then
		return 'ok=true; >' .. info .. '<'
	else
		return  'ok=false; >' .. info .. '<'
	end
end

function p.test1(frame)
	local str = frame.args[1] or ''
	local pattern =  frame.args[2]  or "%([A-Z].*%)"
	if mw.ustring.match(str, pattern) then
		return 'matched'
	else return 'not matched'
	end
end

function p.test2(frame)
	local a1 = frame.args[1] or ''
	local t = {}
	t[1] = {}
	t[1].a = a1
	t[1].e = 'Argument 1 = '
	return t[1].e .. t[1].a
end

function p.test3(frame)
	local a1 = frame.args[1] or ''
	local a2 = frame.args[2] or ''
	local t = mw.text.split( a1, a2, true )
	res = ''
	for i = 1, #t do
		if t[i] == nil then
			t[i] = 'NIL'
		elseif t[i] == '' then
			t[i] = 'EMPTY'
		end
		res = res .. t[i] .. '<br>'
	end
	local tbl = {}
	tbl[1] = {}
	tbl[1][9] = 'TEST'
	return res .. '<br>/' .. tbl[1][9].. '<br>/' .. '<br>/' .. table.concat(tbl[1],',',9,9)
end

function p.test4(frame)
	local a1 = frame.args[1] or ''
	local firstCh = mw.ustring.sub(a1,1,1)
	if firstCh == '[' then firstCh = '*wikilink*' end
	return 'Frame arg1 = '..a1..', first char = '..firstCh
end

function p.test5(frame)
	local tab1 = {}
	local tab2 = {}
	tab1[1] = 'test'
	tab2[1] = {}
	tab2[1].k = 'test'
	tab2[1].v = 0
	return tab1[1]..' '..' '..tab2[1].k..'+'..tab2[1].v
end

function p.existsTest1(frame)
	local taxon = frame.args[1] or 'Life'
	local res
	if mw.title.new('Taxonomy/'..taxon, 'Template').exists then
		res = 'Taxonomy/'..taxon..'exists'
	else
		res = 'Taxonomy/'..taxon..'does not exist'
	end
	return res
end

function p.existsTest2(frame)
	local taxon = frame.args[1] or 'Life'
	local template = 'Template:Taxonomy/' .. taxon
	local item = frame.args[2] or 'all'
	local ok, dummy = pcall(frame.expandTemplate, frame, { title = template, args = {['machine code'] = item } })
	local res = template
	if ok then return res .. ' exists' else return res .. ' does not exist' end
end

function p.getGenus(frame)
	local str = frame.args[1] or ''
	return l.genus(str, 1)
end

function l.genus(str, init)
	local res = mw.ustring.match(str, '^[^%s]*', init)
	if res == mw.ustring.char(215) then
		res = res .. ' ' .. l.genus(str, 3)
	end
	return res
end

function p.getLastWord(frame)
	local str = frame.args[1] or ''
	return l.lastWord(str)
end

function l.lastWord(str)
	local res, n = mw.ustring.gsub(str, '.*%s', '', 1)
	if n == 0 then return str
	else return res
	end
end

function p.abbreviate(frame)
	local str = frame.args[1] or ''
	local res, n = mw.ustring.gsub(str, '([A-Z]).- (.*)', '%1. %2')
	return res .. ' (' .. tostring(n) .. ' matches)'
end

function p.italicTaxonTitle(frame)
	local pageName = frame.args[1] or ''
	pageName = frame:expandTemplate{ title = 'Taxon italics', args = {pageName} }
	return frame:callParserFunction{ name = 'DISPLAYTITLE', args = {pageName} }
end

function p.linkCheck(frame)
	local linkTarget = frame.args[1] or ''
	local linkText = frame.args[2] or ''
	local res = true
	if linkTarget ~= '' and linkText ~= '' and linkTarget ~= linkText then
		local linkTargetTitle = mw.title.new(linkTarget)
		local linkTextTitle = mw.title.new(linkText)
		res = linkTextTitle.redirectTarget == linkTargetTitle
	end
	return res
end

function p.parseSpeciesName(frame)
	local speciesName = frame.args[1] or ''
	local genus, disambig, species = l.doParseSpeciesName(speciesName)
	return 'genus ='..genus..', disambig='..disambig..', species='..species
end

function l.doParseSpeciesName(speciesName)
	local genus = ''
	local disambig = ''
	local species = ''
	local words = mw.text.split(speciesName, " ", true)
	local nWords = #words
	local currWord = 1
	if currWord > nWords then return genus, disambig, species end
	genus = words[currWord]
	if genus == mw.ustring.char(215) then -- hybrid sign
		currWord = currWord + 1
    	if currWord > nWords then return '', disambig, species end
		genus = genus .. ' ' .. words[currWord]
	end
	currWord = currWord + 1
	if currWord > nWords then return genus, disambig, species end
	local disambig = ''
	local species = words[currWord]
	local test = mw.ustring.sub(species,1,1)
	if mw.ustring.sub(species,1,1) == mw.ustring.char(40) then -- '('
		disambig = species
		currWord = currWord + 1
		if currWord > nWords then return genus, disambig, '' end
		species = words[currWord]
	end
	if species == mw.ustring.char(215) then -- hybrid sign
		currWord = currWord + 1
		if currWord > nWords then return genus, disambig, '' end
		species = species .. ' ' .. words[currWord]
	end
	return genus, disambig, species
end
-- =============================================================================
function p.infraspeciesboxName(frame)
	local name = frame.args[1] or ''
	local genus = frame.args[2] or ''
	local species = frame.args[3] or ''
	local ct = frame.args[4] or ''
    local infraspecies = frame.args[5] or ''
	local basePageTitle = frame.args[6] or ''
	local italicTitle = frame.args[7] or ''
	return l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle)
end
	
function l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle)
	genus = mw.ustring.gsub(mw.ustring.gsub(genus, '%s+%b()$', '', 1), '/.*$', '', 1) -- strip any disambig and qualifier
	local taxon = genus .. ' ' .. species
	if ct == '' then taxon = taxon .. ' ' .. infraspecies
	else taxon = taxon .. ' ' .. ct .. ' ' .. infraspecies
	end
	local italicizeP = italicTitle ~= 'no' and (basePageTitle == taxon) -- use basePageTitle to match taxon
	-- deal with taxobox name (i.e. its caption)
	if name == '' then
		name = basePageTitle
		if italicizeP then name = TaxonItalics.italicizeTaxonName(name, false, false) end
	end
	-- deal with page title
	if italicizeP then
		local pageTitle = mw.title.getCurrentTitle().text -- formatting the page title with DISPLAYTITLE needs the full page title
		pageTitle  = TaxonItalics.italicizeTaxonName(pageTitle, false, false, true) -- format pageTitle, not italicizing any parenthesized term
		if italicTitle ~= 'test' then 
			mw.getCurrentFrame():callParserFunction('DISPLAYTITLE', pageTitle)
		else
			name = name .. ' \\Italic title\\ ' .. pageTitle -- for testing and debugging
		end
	end
	return name
end
-- =============================================================================

function p.boldList(frame)
	local items = {}
	for i, v in ipairs(frame:getParent().args) do table.insert(items, v)	end
	local conj = "'''" .. (#items > 2 and ", " or " ") .. (frame.args.conj or "or") .. " '''"
	return "'''"..mw.text.listToText(items, "''', '''", conj).."'''"
end 

return p