Module:Citation/CS1/Identifiers

From WikiProjectMed
Jump to navigation Jump to search

Documentation for this module may be created at Module:Citation/CS1/Identifiers/doc

   1 --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
   2 ]]
   3 
   4 local has_accept_as_written, is_set, in_array, set_message, select_one,			-- functions in Module:Citation/CS1/Utilities
   5 		substitute, make_wikilink;
   6 
   7 local z;																		-- table of tables defined in Module:Citation/CS1/Utilities
   8 
   9 local cfg;																		-- table of configuration tables that are defined in Module:Citation/CS1/Configuration
  10 
  11 
  12 --[[--------------------------< P A G E   S C O P E   V A R I A B L E S >--------------------------------------
  13 
  14 declare variables here that have page-wide scope that are not brought in from other modules; that are created here and used here
  15 
  16 ]]
  17 
  18 local auto_link_urls = {};														-- holds identifier URLs for those identifiers that can auto-link |title=
  19 
  20 
  21 --============================<< H E L P E R   F U N C T I O N S >>============================================
  22 
  23 --[[--------------------------< W I K I D A T A _ A R T I C L E _ N A M E _ G E T >----------------------------
  24 
  25 as an aid to internationalizing identifier-label wikilinks, gets identifier article names from Wikidata.
  26 
  27 returns :<lang code>:<article title> when <q> has an <article title> for <lang code>; nil else
  28 
  29 for identifiers that do not have q, returns nil
  30 
  31 for wikis that do not have mw.wikibase installed, returns nil
  32 
  33 ]]
  34 
  35 local function wikidata_article_name_get (q)
  36 	if not is_set (q) or (q and not mw.wikibase) then							-- when no q number or when a q number but mw.wikibase not installed on this wiki
  37 		return nil;																-- abandon
  38 	end
  39 
  40 	local wd_article;
  41 	local this_wiki_code = cfg.this_wiki_code;									-- Wikipedia subdomain; 'en' for en.wikipedia.org
  42 
  43 	wd_article = mw.wikibase.getSitelink (q, this_wiki_code .. 'wiki');			-- fetch article title from WD; nil when no title available at this wiki
  44 
  45 	if wd_article then
  46 		wd_article = table.concat ({':', this_wiki_code, ':', wd_article});		-- interwiki-style link without brackets if taken from WD; leading colon required
  47 	end
  48 
  49 	return wd_article;															-- article title from WD; nil else
  50 end
  51 
  52 
  53 --[[--------------------------< L I N K _ L A B E L _ M A K E >------------------------------------------------
  54 
  55 common function to create identifier link label from handler table or from Wikidata
  56 
  57 returns the first available of
  58 	1. redirect from local wiki's handler table (if enabled)
  59 	2. Wikidata (if there is a Wikidata entry for this identifier in the local wiki's language)
  60 	3. label specified in the local wiki's handler table
  61 	
  62 ]]
  63 
  64 local function link_label_make (handler)
  65 	local wd_article;
  66 	
  67 	if not (cfg.use_identifier_redirects and is_set (handler.redirect)) then	-- redirect has priority so if enabled and available don't fetch from Wikidata because expensive
  68 		wd_article = wikidata_article_name_get (handler.q);						-- if Wikidata has an article title for this wiki, get it;
  69 	end
  70 	
  71 	return (cfg.use_identifier_redirects and is_set (handler.redirect) and handler.redirect) or wd_article or handler.link;
  72 end
  73 
  74 
  75 --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
  76 
  77 Formats a wiki-style external link
  78 
  79 ]]
  80 
  81 local function external_link_id (options)
  82 	local url_string = options.id;
  83 	local ext_link;
  84 	local this_wiki_code = cfg.this_wiki_code;									-- Wikipedia subdomain; 'en' for en.wikipedia.org
  85 	local wd_article;															-- article title from Wikidata
  86 	
  87 	if options.encode == true or options.encode == nil then
  88 		url_string = mw.uri.encode (url_string, 'PATH');
  89 	end
  90 
  91 	if options.auto_link and is_set (options.access) then
  92 		auto_link_urls[options.auto_link] = table.concat ({options.prefix, url_string, options.suffix});
  93 	end
  94 
  95 	ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki (options.id));
  96 	if is_set (options.access) then
  97 		ext_link = substitute (cfg.presentation['ext-link-access-signal'], {cfg.presentation[options.access].class, cfg.presentation[options.access].title, ext_link});	-- add the free-to-read / paywall lock
  98 	end
  99 
 100 	return table.concat	({
 101 		make_wikilink (link_label_make (options), options.label),				-- redirect, Wikidata link, or locally specified link (in that order)
 102 		options.separator or '&nbsp;',
 103 		ext_link
 104 		});
 105 end
 106 
 107 
 108 --[[--------------------------< I N T E R N A L _ L I N K _ I D >----------------------------------------------
 109 
 110 Formats a wiki-style internal link
 111 
 112 TODO: Does not currently need to support options.access, options.encode, auto-linking and COinS (as in external_link_id),
 113 but may be needed in the future for :m:Interwiki_map custom-prefixes like :arxiv:, :bibcode:, :DOI:, :hdl:, :ISSN:,
 114 :JSTOR:, :Openlibrary:, :PMID:, :RFC:.
 115 
 116 ]]
 117 
 118 local function internal_link_id (options)
 119 	local id = mw.ustring.gsub (options.id, '%d', cfg.date_names.local_digits);	-- translate 'local' digits to Western 0-9
 120 
 121 	return table.concat (
 122 		{
 123 		make_wikilink (link_label_make (options), options.label),				-- wiki-link the identifier label
 124 		options.separator or '&nbsp;',											-- add the separator
 125 		make_wikilink (
 126 			table.concat (
 127 				{
 128 				options.prefix,
 129 				id,																-- translated to Western digits
 130 				options.suffix or ''
 131 				}),
 132 			substitute (cfg.presentation['bdi'], {'', mw.text.nowiki (options.id)})	-- bdi tags to prevent Latin script identifiers from being reversed at RTL language wikis
 133 			);																	-- nowiki because MediaWiki still has magic links for ISBN and the like; TODO: is it really required?
 134 		});
 135 end
 136 
 137 
 138 --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
 139 
 140 Determines if a PMC identifier's online version is embargoed. Compares the date in |pmc-embargo-date= against
 141 today's date.  If embargo date is in the future, returns the content of |pmc-embargo-date=; otherwise, returns
 142 an empty string because the embargo has expired or because |pmc-embargo-date= was not set in this cite.
 143 
 144 ]]
 145 
 146 local function is_embargoed (embargo)
 147 	if is_set (embargo) then
 148 		local lang = mw.getContentLanguage();
 149 		local good1, embargo_date, todays_date;
 150 		good1, embargo_date = pcall (lang.formatDate, lang, 'U', embargo);
 151 		todays_date = lang:formatDate ('U');
 152 	
 153 		if good1 then															-- if embargo date is a good date
 154 			if tonumber (embargo_date) >= tonumber (todays_date) then			-- is embargo date is in the future?
 155 				return embargo;													-- still embargoed
 156 			else
 157 				set_message ('maint_pmc_embargo');								-- embargo has expired; add main cat
 158 				return '';														-- unset because embargo has expired
 159 			end
 160 		end
 161 	end
 162 	return '';																	-- |pmc-embargo-date= not set return empty string
 163 end
 164 
 165 
 166 --[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------
 167 
 168 returns true if:
 169 	2019-12-11T00:00Z <= biorxiv_date < today + 2 days
 170 	
 171 The dated form of biorxiv identifier has a start date of 2019-12-11.  The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
 172 
 173 biorxiv_date is the date provided in those |biorxiv= parameter values that are dated at time 00:00:00 UTC
 174 today is the current date at time 00:00:00 UTC plus 48 hours
 175 	if today is 2015-01-01T00:00:00 then
 176 		adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
 177 		adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
 178 
 179 This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
 180 apparently doesn't understand non-English date month names. This function will always return false when the date
 181 contains a non-English month name because good1 is false after the call to lang.formatDate().  To get around that
 182 call this function with YYYY-MM-DD format dates.
 183 
 184 ]=]
 185 
 186 local function is_valid_biorxiv_date (biorxiv_date)
 187 	local good1, good2;
 188 	local biorxiv_ts, tomorrow_ts;												-- to hold Unix timestamps representing the dates
 189 	local lang_object = mw.getContentLanguage();
 190 
 191 	good1, biorxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_date);		-- convert biorxiv_date value to Unix timestamp 
 192 	good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' );	-- today midnight + 2 days is one second more than all day tomorrow
 193 	
 194 	if good1 and good2 then														-- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
 195 		biorxiv_ts = tonumber (biorxiv_ts) or lang_object:parseFormattedNumber (biorxiv_ts);	-- convert to numbers for the comparison;
 196 		tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
 197 	else
 198 		return false;															-- one or both failed to convert to Unix timestamp
 199 	end
 200 
 201 	return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts))			-- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
 202 end
 203 
 204 
 205 --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
 206 
 207 ISBN-10 and ISSN validator code calculates checksum across all ISBN/ISSN digits including the check digit.
 208 ISBN-13 is checked in isbn().
 209 
 210 If the number is valid the result will be 0. Before calling this function, ISBN/ISSN must be checked for length
 211 and stripped of dashes, spaces and other non-ISxN characters.
 212 
 213 ]]
 214 
 215 local function is_valid_isxn (isxn_str, len)
 216 	local temp = 0;
 217 	isxn_str = { isxn_str:byte(1, len) };										-- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
 218 	len = len + 1;																-- adjust to be a loop counter
 219 	for i, v in ipairs (isxn_str) do											-- loop through all of the bytes and calculate the checksum
 220 		if v == string.byte ("X" ) then											-- if checkdigit is X (compares the byte value of 'X' which is 0x58)
 221 			temp = temp + 10 * (len - i);										-- it represents 10 decimal
 222 		else
 223 			temp = temp + tonumber (string.char (v) )*(len-i);
 224 		end
 225 	end
 226 	return temp % 11 == 0;														-- returns true if calculation result is zero
 227 end
 228 
 229 
 230 --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >-----------------------------------------------
 231 
 232 ISBN-13 and ISMN validator code calculates checksum across all 13 ISBN/ISMN digits including the check digit.
 233 If the number is valid, the result will be 0. Before calling this function, ISBN-13/ISMN must be checked for length
 234 and stripped of dashes, spaces and other non-ISxN-13 characters.
 235 
 236 ]]
 237 
 238 local function is_valid_isxn_13 (isxn_str)
 239 	local temp=0;
 240 	
 241 	isxn_str = { isxn_str:byte(1, 13) };										-- make a table of byte values '0' → 0x30 .. '9' → 0x39
 242 	for i, v in ipairs (isxn_str) do
 243 		temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) );			-- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
 244 	end
 245 	return temp % 10 == 0;														-- sum modulo 10 is zero when ISBN-13/ISMN is correct
 246 end
 247 
 248 
 249 --[[--------------------------< N O R M A L I Z E _ L C C N >--------------------------------------------------
 250 
 251 LCCN normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
 252 1. Remove all blanks.
 253 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
 254 3. If there is a hyphen in the string:
 255 	a. Remove it.
 256 	b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
 257 		1. All these characters should be digits, and there should be six or less. (not done in this function)
 258 		2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
 259 
 260 Returns a normalized LCCN for lccn() to validate.  There is no error checking (step 3.b.1) performed in this function.
 261 ]]
 262 
 263 local function normalize_lccn (lccn)
 264 	lccn = lccn:gsub ("%s", "");												-- 1. strip whitespace
 265 
 266 	if nil ~= string.find (lccn, '/') then
 267 		lccn = lccn:match ("(.-)/");											-- 2. remove forward slash and all character to the right of it
 268 	end
 269 
 270 	local prefix
 271 	local suffix
 272 	prefix, suffix = lccn:match ("(.+)%-(.+)");									-- 3.a remove hyphen by splitting the string into prefix and suffix
 273 
 274 	if nil ~= suffix then														-- if there was a hyphen
 275 		suffix = string.rep("0", 6-string.len (suffix)) .. suffix;				-- 3.b.2 left fill the suffix with 0s if suffix length less than 6
 276 		lccn = prefix..suffix;													-- reassemble the LCCN
 277 	end
 278 	
 279 	return lccn;
 280 	end
 281 
 282 
 283 --============================<< I D E N T I F I E R   F U N C T I O N S >>====================================
 284 
 285 --[[--------------------------< A R X I V >--------------------------------------------------------------------
 286 
 287 See: http://arxiv.org/help/arxiv_identifier
 288 
 289 format and error check arXiv identifier.  There are three valid forms of the identifier:
 290 the first form, valid only between date codes 9107 and 0703, is:
 291 	arXiv:<archive>.<class>/<date code><number><version>
 292 where:
 293 	<archive> is a string of alpha characters - may be hyphenated; no other punctuation
 294 	<class> is a string of alpha characters - may be hyphenated; no other punctuation; not the same as |class= parameter which is not supported in this form
 295 	<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
 296 		first digit of YY for this form can only 9 and 0
 297 	<number> is a three-digit number
 298 	<version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
 299 	
 300 the second form, valid from April 2007 through December 2014 is:
 301 	arXiv:<date code>.<number><version>
 302 where:
 303 	<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
 304 	<number> is a four-digit number
 305 	<version> is a 1 or more digit number preceded with a lowercase v; no spaces
 306 
 307 the third form, valid from January 2015 is:
 308 	arXiv:<date code>.<number><version>
 309 where:
 310 	<date code> and <version> are as defined for 0704-1412
 311 	<number> is a five-digit number
 312 ]]
 313 
 314 local function arxiv (options)
 315 	local id = options.id;
 316 	local class = options.Class;												-- TODO: lowercase?
 317 	local handler = options.handler;
 318 	local year, month, version;
 319 	local err_cat = false;														-- assume no error message
 320 	local text;																	-- output text
 321 	
 322 	if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then	-- test for the 9107-0703 format with or without version
 323 		year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
 324 		year = tonumber (year);
 325 		month = tonumber (month);
 326 		if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or		-- if invalid year or invalid month
 327 			((91 == year and 7 > month) or (7 == year and 3 < month)) then		-- if years ok, are starting and ending months ok?
 328 				err_cat = true;													-- flag for error message
 329 		end
 330 
 331 	elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then	-- test for the 0704-1412 with or without version
 332 		year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
 333 		year = tonumber (year);
 334 		month = tonumber (month);
 335 		if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or			-- is year invalid or is month invalid? (doesn't test for future years)
 336 			((7 == year) and (4 > month)) then									-- when year is 07, is month invalid (before April)?
 337 				err_cat = true;													-- flag for error message
 338 		end
 339 
 340 	elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then	-- test for the 1501- format with or without version
 341 		year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
 342 		year = tonumber (year);
 343 		month = tonumber (month);
 344 		if ((15 > year) or (1 > month or 12 < month)) then						-- is year invalid or is month invalid? (doesn't test for future years)
 345 				err_cat = true;													-- flag for error message
 346 		end
 347 
 348 	else
 349 		err_cat = true;															-- not a recognized format; flag for error message
 350 	end
 351 
 352 	if err_cat then
 353 		options.coins_list_t['ARXIV'] = nil;									-- when error, unset so not included in COinS
 354 	end
 355 	
 356 	err_cat = err_cat and table.concat ({' ', set_message ('err_bad_arxiv')}) or '';	-- set error message if flag is true
 357 	
 358 	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 359 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;
 360 
 361 	if is_set (class) then
 362 		if id:match ('^%d+') then
 363 			text = table.concat ({text, ' [[//arxiv.org/archive/', class, ' ', class, ']]'});	-- external link within square brackets, not wikilink
 364 		else
 365 			text = table.concat ({text, ' ', set_message ('err_class_ignored')});		
 366 		end
 367 	end
 368 
 369 	return text;	
 370 end
 371 
 372 
 373 --[[--------------------------< B I B C O D E >--------------------------------------------------------------------
 374 
 375 Validates (sort of) and formats a bibcode ID.
 376 
 377 Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes
 378 
 379 But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters
 380 and first four digits must be a year.  This function makes these tests:
 381 	length must be 19 characters
 382 	characters in position
 383 		1–4 must be digits and must represent a year in the range of 1000 – next year
 384 		5 must be a letter
 385 		6–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. )
 386 		9–18 must be letter, digit, or dot
 387 		19 must be a letter or dot
 388 
 389 ]]
 390 
 391 local function bibcode (options)
 392 	local id = options.id;
 393 	local access = options.access;
 394 	local handler = options.handler;
 395 	local err_type;
 396 	local year;
 397 
 398 	local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 399 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,
 400 		access = access});
 401 	
 402 	if 19 ~= id:len() then
 403 		err_type = cfg.err_msg_supl.length;
 404 	else
 405 		year = id:match ("^(%d%d%d%d)[%a][%w&%.][%w&%.][%w&%.][%w.]+[%a%.]$");
 406 		if not year then														-- if nil then no pattern match
 407 			err_type = cfg.err_msg_supl.value;									-- so value error
 408 		else
 409 			local next_year = tonumber (os.date ('%Y')) + 1;					-- get the current year as a number and add one for next year
 410 			year = tonumber (year);												-- convert year portion of bibcode to a number
 411 			if (1000 > year) or (year > next_year) then
 412 				err_type = cfg.err_msg_supl.year;								-- year out of bounds
 413 			end
 414 			if id:find('&%.') then
 415 				err_type = cfg.err_msg_supl.journal;							-- journal abbreviation must not have '&.' (if it does it's missing a letter)
 416 			end
 417 		end
 418 	end
 419 
 420 	if is_set (err_type) then													-- if there was an error detected
 421 		text = text .. ' ' .. set_message ('err_bad_bibcode', {err_type});
 422 		options.coins_list_t['BIBCODE'] = nil;									-- when error, unset so not included in COinS
 423 
 424 	end
 425 	return text;
 426 end
 427 
 428 
 429 --[[--------------------------< B I O R X I V >-----------------------------------------------------------------
 430 
 431 Format bioRxiv ID and do simple error checking.  Before 2019-12-11, biorXiv IDs were 10.1101/ followed by exactly
 432 6 digits.  After 2019-12-11, biorXiv IDs retained the six-digit identifier but prefixed that with a yyyy.mm.dd. 
 433 date and suffixed with an optional version identifier.
 434 
 435 The bioRxiv ID is the string of characters:
 436 	https://doi.org/10.1101/078733 -> 10.1101/078733
 437 or a date followed by a six-digit number followed by an optional version indicator 'v' and one or more digits:
 438 	https://www.biorxiv.org/content/10.1101/2019.12.11.123456v2 -> 10.1101/2019.12.11.123456v2
 439 	
 440 see https://www.biorxiv.org/about-biorxiv
 441 
 442 ]]
 443 
 444 local function biorxiv (options)
 445 	local id = options.id;
 446 	local handler = options.handler;
 447 	local err_cat = true;														-- flag; assume that there will be an error
 448 	
 449 	local patterns = {
 450 		'^10.1101/%d%d%d%d%d%d$',												-- simple 6-digit identifier (before 2019-12-11)
 451 		'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%dv%d+$',			-- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
 452 		'^10.1101/(20[1-9]%d)%.([01]%d)%.([0-3]%d)%.%d%d%d%d%d%d$',				-- y.m.d. date + 6-digit identifier (after 2019-12-11)
 453 		}
 454 	
 455 	for _, pattern in ipairs (patterns) do										-- spin through the patterns looking for a match
 456 		if id:match (pattern) then
 457 			local y, m, d = id:match (pattern);									-- found a match, attempt to get year, month and date from the identifier
 458 
 459 			if m then															-- m is nil when id is the six-digit form
 460 				if not is_valid_biorxiv_date (y .. '-' .. m .. '-' .. d) then	-- validate the encoded date; TODO: don't ignore leap-year and actual month lengths ({{#time:}} is a poor date validator)
 461 					break;														-- date fail; break out early so we don't unset the error message
 462 				end
 463 			end
 464 			err_cat = nil;														-- we found a match so unset the error message
 465 			break;																-- and done
 466 		end
 467 	end																			-- err_cat remains set here when no match
 468 
 469 	if err_cat then
 470 		options.coins_list_t['BIORXIV'] = nil;									-- when error, unset so not included in COinS
 471 	end
 472 	
 473 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 474 			prefix = handler.prefix, id = id, separator = handler.separator,
 475 			encode = handler.encode, access = handler.access}) .. (err_cat and (' ' .. set_message ('err_bad_biorxiv')) or '');
 476 end
 477 
 478 
 479 --[[--------------------------< C I T E S E E R X >------------------------------------------------------------
 480 
 481 CiteSeerX use their own notion of "doi" (not to be confused with the identifiers resolved via doi.org).
 482 
 483 The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure
 484 ]]
 485 
 486 local function citeseerx (options)
 487 	local id = options.id;
 488 	local handler = options.handler;
 489 	local matched;
 490 	
 491 	local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 492 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode,
 493 		access = handler.access});
 494 	
 495 	matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$");
 496 	if not matched then
 497 		text = text .. ' ' .. set_message ('err_bad_citeseerx' );
 498 		options.coins_list_t['CITESEERX'] = nil;								-- when error, unset so not included in COinS
 499 	end
 500 	return text;
 501 end
 502 
 503 
 504 --[[--------------------------< D O I >------------------------------------------------------------------------
 505 
 506 Formats a DOI and checks for DOI errors.
 507 
 508 DOI names contain two parts: prefix and suffix separated by a forward slash.
 509 	Prefix: directory indicator '10.' followed by a registrant code
 510 	Suffix: character string of any length chosen by the registrant
 511 
 512 This function checks a DOI name for: prefix/suffix.  If the DOI name contains spaces or endashes, or, if it ends
 513 with a period or a comma, this function will emit a bad_doi error message.
 514 
 515 DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
 516 and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
 517 if ever used in DOI names.
 518 
 519 ]]
 520 
 521 local function doi (options)
 522 	local id = options.id;
 523 	local inactive = options.DoiBroken
 524 	local access = options.access;
 525 	local ignore_invalid = options.accept;
 526 	local handler = options.handler;
 527 	local err_cat;
 528 
 529 	local text;
 530 	if is_set (inactive) then
 531 		local inactive_year = inactive:match("%d%d%d%d") or '';					-- try to get the year portion from the inactive date
 532 		local inactive_month, good;
 533 
 534 		if is_set (inactive_year) then
 535 			if 4 < inactive:len() then											-- inactive date has more than just a year (could be anything)
 536 				local lang_obj = mw.getContentLanguage();						-- get a language object for this wiki
 537 				good, inactive_month = pcall (lang_obj.formatDate, lang_obj, 'F', inactive);	-- try to get the month name from the inactive date
 538 				if not good then
 539 					inactive_month = nil;										-- something went wrong so make sure this is unset
 540 				end
 541 			end
 542 		else
 543 			inactive_year = nil;												-- |doi-broken-date= has something but it isn't a date
 544 		end
 545 		
 546 		if is_set (inactive_year) and is_set (inactive_month) then
 547 			set_message ('maint_doi_inactive_dated', {inactive_year, inactive_month, ' '});
 548 		elseif is_set (inactive_year) then
 549 			set_message ('maint_doi_inactive_dated', {inactive_year, '', ''});
 550 		else
 551 			set_message ('maint_doi_inactive');
 552 		end
 553 		inactive = " (" .. cfg.messages['inactive'] .. ' ' .. inactive .. ')';
 554 	end
 555 
 556 	local registrant = id:match ('^10%.([^/]+)/[^%s–]-[^%.,]$');				-- registrant set when DOI has the proper basic form
 557 	
 558 	local registrant_err_patterns = {											-- these patterns are for code ranges that are not supported 
 559 		'^[^1-3]%d%d%d%d%.%d%d*$',												-- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
 560 		'^[^1-5]%d%d%d%d$',														-- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–59999
 561 		'^[^1-9]%d%d%d%.%d%d*$',												-- 4 digits with subcode (0xxx); accepts: 1000–9999
 562 		'^[^1-9]%d%d%d$',														-- 4 digits without subcode (0xxx); accepts: 1000–9999
 563 		'^%d%d%d%d%d%d+',														-- 6 or more digits
 564 		'^%d%d?%d?$',															-- less than 4 digits without subcode (with subcode is legitimate)
 565 		'^5555$',																-- test registrant will never resolve
 566 		'[^%d%.]',																-- any character that isn't a digit or a dot
 567 		}
 568 
 569 	if not ignore_invalid then
 570 		if registrant then														-- when DOI has proper form
 571 			for i, pattern in ipairs (registrant_err_patterns) do				-- spin through error patterns
 572 				if registrant:match (pattern) then								-- to validate registrant codes
 573 					err_cat = ' ' .. set_message ('err_bad_doi');				-- when found, mark this DOI as bad
 574 					break;														-- and done
 575 				end
 576 			end
 577 		else
 578 			err_cat = ' ' .. set_message ('err_bad_doi');						-- invalid directory or malformed
 579 		end
 580 	else
 581 		set_message ('maint_doi_ignore');
 582 	end
 583 
 584 	if err_cat then
 585 		options.coins_list_t['DOI'] = nil;										-- when error, unset so not included in COinS
 586 	end
 587 	
 588 	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 589 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access,
 590 		auto_link = not (err_cat or is_set (inactive) or ignore_invalid) and 'doi' or nil -- do not auto-link when |doi-broken-date= has a value or when there is a DOI error or (to play it safe, after all, auto-linking is not essential) when invalid DOIs are ignored
 591 		}) .. (inactive or '');
 592 
 593 	return text .. (err_cat and err_cat or ''); -- parentheses required
 594 end
 595 
 596 
 597 --[[--------------------------< H D L >------------------------------------------------------------------------
 598 
 599 Formats an HDL with minor error checking.
 600 
 601 HDL names contain two parts: prefix and suffix separated by a forward slash.
 602 	Prefix: character string using any character in the UCS-2 character set except '/'
 603 	Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
 604 
 605 This function checks a HDL name for: prefix/suffix.  If the HDL name contains spaces, endashes, or, if it ends
 606 with a period or a comma, this function will emit a bad_hdl error message.
 607 
 608 HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
 609 terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
 610 if ever used in HDLs.
 611 
 612 Query string parameters are named here: http://www.handle.net/proxy_servlet.html.  query strings are not displayed
 613 but since '?' is an allowed character in an HDL, '?' followed by one of the query parameters is the only way we
 614 have to detect the query string so that it isn't URL-encoded with the rest of the identifier.
 615 
 616 ]]
 617 
 618 local function hdl (options)
 619 	local id = options.id;
 620 	local access = options.access;
 621 	local handler = options.handler;
 622 	local query_params = {														-- list of known query parameters from http://www.handle.net/proxy_servlet.html
 623 		'noredirect',
 624 		'ignore_aliases',
 625 		'auth',
 626 		'cert',
 627 		'index',
 628 		'type',
 629 		'urlappend',
 630 		'locatt',
 631 		'action',
 632 		}
 633 	
 634 	local hdl, suffix, param = id:match ('(.-)(%?(%a+).+)$');					-- look for query string
 635 	local found;
 636 
 637 	if hdl then																	-- when there are query strings, this is the handle identifier portion
 638 		for _, q in ipairs (query_params) do									-- spin through the list of query parameters
 639 			if param:match ('^' .. q) then										-- if the query string begins with one of the parameters
 640 				found = true;													-- announce a find
 641 				break;															-- and stop looking
 642 			end
 643 		end
 644 	end
 645 
 646 	if found then
 647 		id = hdl;																-- found so replace id with the handle portion; this will be URL-encoded, suffix will not
 648 	else
 649 		suffix = '';															-- make sure suffix is empty string for concatenation else
 650 	end
 651 
 652 	local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 653 			prefix = handler.prefix, id = id, suffix = suffix, separator = handler.separator, encode = handler.encode, access = access})
 654 
 655 	if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then							-- HDL must contain a forward slash, must not contain spaces, endashes, and must not end with period or comma
 656 		text = text .. ' ' .. set_message ('err_bad_hdl' );
 657 		options.coins_list_t['HDL'] = nil;										-- when error, unset so not included in COinS
 658 	end
 659 	return text;
 660 end
 661 
 662 
 663 --[[--------------------------< I S B N >----------------------------------------------------------------------
 664 
 665 Determines whether an ISBN string is valid
 666 
 667 ]]
 668 
 669 local function isbn (options)
 670 	local isbn_str = options.id;
 671 	local ignore_invalid = options.accept;
 672 	local handler = options.handler;
 673 
 674 	local function return_result (check, err_type)								-- local function to handle the various returns
 675 		local ISBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
 676 						prefix = handler.prefix, id = isbn_str, separator = handler.separator});
 677 		if ignore_invalid then													-- if ignoring ISBN errors
 678 			set_message ('maint_isbn_ignore');									-- add a maint category even when there is no error
 679 		else																	-- here when not ignoring
 680 			if not check then													-- and there is an error
 681 				options.coins_list_t['ISBN'] = nil;								-- when error, unset so not included in COinS
 682 				return ISBN .. set_message ('err_bad_isbn', {err_type}, false, ' ');	-- display an error message
 683 			end
 684 		end
 685 		return ISBN;
 686 	end
 687 
 688 	if nil ~= isbn_str:match ('[^%s-0-9X]') then
 689 		return return_result (false, cfg.err_msg_supl.char);					-- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
 690 	end
 691 
 692 	local id = isbn_str:gsub ('[%s-]', '');										-- remove hyphens and whitespace
 693 
 694 	local len = id:len();
 695  
 696 	if len ~= 10 and len ~= 13 then
 697 		return return_result (false, cfg.err_msg_supl.length);					-- fail if incorrect length
 698 	end
 699 
 700 	if len == 10 then
 701 		if id:match ('^%d*X?$') == nil then										-- fail if isbn_str has 'X' anywhere but last position
 702 			return return_result (false, cfg.err_msg_supl.form);									
 703 		end
 704 		if not is_valid_isxn (id, 10) then										-- test isbn-10 for numerical validity
 705 			return return_result (false, cfg.err_msg_supl.check);				-- fail if isbn-10 is not numerically valid
 706 		end
 707 		if id:find ('^63[01]') then												-- 630xxxxxxx and 631xxxxxxx are (apparently) not valid isbn group ids but are used by amazon as numeric identifiers (asin)
 708 			return return_result (false, cfg.err_msg_supl.group);				-- fail if isbn-10 begins with 630/1
 709 		end
 710 		return return_result (true, cfg.err_msg_supl.check);					-- pass if isbn-10 is numerically valid
 711 	else
 712 		if id:match ('^%d+$') == nil then
 713 			return return_result (false, cfg.err_msg_supl.char);				-- fail if ISBN-13 is not all digits
 714 		end
 715 		if id:match ('^97[89]%d*$') == nil then
 716 			return return_result (false, cfg.err_msg_supl.prefix);				-- fail when ISBN-13 does not begin with 978 or 979
 717 		end
 718 		if id:match ('^9790') then
 719 			return return_result (false, cfg.err_msg_supl.group);				-- group identifier '0' is reserved to ISMN
 720 		end
 721 		return return_result (is_valid_isxn_13 (id), cfg.err_msg_supl.check);
 722 	end
 723 end
 724 
 725 
 726 --[[--------------------------< A S I N >----------------------------------------------------------------------
 727 
 728 Formats a link to Amazon.  Do simple error checking: ASIN must be mix of 10 numeric or uppercase alpha
 729 characters.  If a mix, first character must be uppercase alpha; if all numeric, ASINs must be 10-digit
 730 ISBN. If 10-digit ISBN, add a maintenance category so a bot or AWB script can replace |asin= with |isbn=.
 731 Error message if not 10 characters, if not ISBN-10, if mixed and first character is a digit.
 732 
 733 |asin=630....... and |asin=631....... are (apparently) not a legitimate ISBN though it checksums as one; these
 734 do not cause this function to emit the maint_asin message
 735 
 736 This function is positioned here because it calls isbn()
 737 
 738 ]]
 739 
 740 local function asin (options)
 741 	local id = options.id;
 742 	local domain = options.ASINTLD;
 743 	
 744 	local err_cat = ''
 745 
 746 	if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
 747 		err_cat = ' ' .. set_message ('err_bad_asin');							-- ASIN is not a mix of 10 uppercase alpha and numeric characters
 748 	else
 749 		if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then							-- if 10-digit numeric (or 9 digits with terminal X)
 750 			if is_valid_isxn (id, 10) then										-- see if ASIN value is or validates as ISBN-10
 751 				if not id:find ('^63[01]') then									-- 630xxxxxxx and 631xxxxxxx are (apparently) not a valid isbn prefixes but are used by amazon as a numeric identifier
 752 					err_cat = ' ' .. set_message ('err_bad_asin');				-- ASIN has ISBN-10 form but begins with something other than 630/1 so probably an isbn 
 753 				end
 754 			elseif not is_set (err_cat) then
 755 				err_cat = ' ' .. set_message ('err_bad_asin');					-- ASIN is not ISBN-10
 756 			end
 757 		elseif not id:match("^%u[%d%u]+$") then
 758 			err_cat = ' ' .. set_message ('err_bad_asin');						-- asin doesn't begin with uppercase alpha
 759 		end
 760 	end
 761 	if (not is_set (domain)) or in_array (domain, {'us'}) then					-- default: United States
 762 		domain = "com";
 763 	elseif in_array (domain, {'jp', 'uk'}) then									-- Japan, United Kingdom
 764 		domain = "co." .. domain;
 765 	elseif in_array (domain, {'z.cn'}) then 									-- China
 766 		domain = "cn";
 767 	elseif in_array (domain, {'au', 'br', 'mx', 'sg', 'tr'}) then				-- Australia, Brazil, Mexico, Singapore, Turkey
 768 		domain = "com." .. domain;
 769 	elseif not in_array (domain, {'ae', 'ca', 'cn', 'de', 'es', 'fr', 'in', 'it', 'nl', 'pl', 'sa', 'se', 'co.jp', 'co.uk', 'com', 'com.au', 'com.br', 'com.mx', 'com.sg', 'com.tr'}) then -- Arabic Emirates, Canada, China, Germany, Spain, France, Indonesia, Italy, Netherlands, Poland, Saudi Arabia, Sweden (as of 2021-03 Austria (.at), Liechtenstein (.li) and Switzerland (.ch) still redirect to the German site (.de) with special settings, so don't maintain local ASINs for them)
 770 			err_cat = ' ' .. set_message ('err_bad_asin_tld');					-- unsupported asin-tld value
 771 	end
 772 	local handler = options.handler;
 773 
 774 	if not is_set (err_cat) then
 775 		options.coins_list_t['ASIN'] = handler.prefix .. domain .. "/dp/" .. id;	-- experiment for asin coins
 776 	else
 777 		options.coins_list_t['ASIN'] = nil;										-- when error, unset so not included in COinS
 778 	end
 779 	
 780 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 781 		prefix = handler.prefix .. domain .. "/dp/",
 782 		id = id, encode = handler.encode, separator = handler.separator}) .. err_cat;
 783 end
 784 
 785 
 786 --[[--------------------------< I S M N >----------------------------------------------------------------------
 787 
 788 Determines whether an ISMN string is valid.  Similar to ISBN-13, ISMN is 13 digits beginning 979-0-... and uses the
 789 same check digit calculations.  See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
 790 section 2, pages 9–12.
 791 
 792 ismn value not made part of COinS metadata because we don't have a url or isn't a COinS-defined identifier (rft.xxx)
 793 or an identifier registered at info-uri.info (info:)
 794 
 795 ]]
 796 
 797 local function ismn (options)
 798 	local id = options.id;
 799 	local handler = options.handler;
 800 	local text;
 801 	local valid_ismn = true;
 802 	local id_copy;
 803 
 804 	id_copy = id;																-- save a copy because this testing is destructive
 805 	id = id:gsub ('[%s-]', '');													-- remove hyphens and white space
 806 
 807 	if 13 ~= id:len() or id:match ("^9790%d*$" ) == nil then					-- ISMN must be 13 digits and begin with 9790
 808 		valid_ismn = false;
 809 	else
 810 		valid_ismn=is_valid_isxn_13 (id);										-- validate ISMN
 811 	end
 812 
 813 	--	text = internal_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,		-- use this (or external version) when there is some place to link to
 814 	--		prefix = handler.prefix, id = id_copy, separator = handler.separator, encode = handler.encode})
 815 
 816 	text = table.concat (														-- because no place to link to yet
 817 		{
 818 		make_wikilink (link_label_make (handler), handler.label),
 819 		handler.separator,
 820 		id_copy
 821 		});
 822 
 823 	if false == valid_ismn then
 824 		options.coins_list_t['ISMN'] = nil;										-- when error, unset so not included in COinS; not really necessary here because ismn not made part of COinS
 825 		text = text .. ' ' .. set_message ('err_bad_ismn' )						-- add an error message if the ISMN is invalid
 826 	end 
 827 	
 828 	return text;
 829 end
 830 
 831 
 832 --[[--------------------------< I S S N >----------------------------------------------------------------------
 833 
 834 Validate and format an ISSN.  This code fixes the case where an editor has included an ISSN in the citation but
 835 has separated the two groups of four digits with a space.  When that condition occurred, the resulting link looked
 836 like this:
 837 
 838 	|issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327]	-- can't have spaces in an external link
 839 	
 840 This code now prevents that by inserting a hyphen at the ISSN midpoint.  It also validates the ISSN for length
 841 and makes sure that the checkdigit agrees with the calculated value.  Incorrect length (8 digits), characters
 842 other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check ISSN error message.  The
 843 ISSN is always displayed with a hyphen, even if the ISSN was given as a single group of 8 digits.
 844 
 845 ]]
 846 
 847 local function issn (options)
 848 	local id = options.id;
 849 	local handler = options.handler;
 850 	local ignore_invalid = options.accept;
 851 
 852 	local issn_copy = id;														-- save a copy of unadulterated ISSN; use this version for display if ISSN does not validate
 853 	local text;
 854 	local valid_issn = true;
 855 
 856 	id = id:gsub ('[%s-]', '');													-- remove hyphens and whitespace
 857 
 858 	if 8 ~= id:len() or nil == id:match ("^%d*X?$" ) then						-- validate the ISSN: 8 digits long, containing only 0-9 or X in the last position
 859 		valid_issn = false;														-- wrong length or improper character
 860 	else
 861 		valid_issn = is_valid_isxn (id, 8);										-- validate ISSN
 862 	end
 863 
 864 	if true == valid_issn then
 865 		id = string.sub (id, 1, 4 ) .. "-" .. string.sub (id, 5 );				-- if valid, display correctly formatted version
 866 	else
 867 		id = issn_copy;															-- if not valid, show the invalid ISSN with error message
 868 	end
 869 
 870 	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 871 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})
 872 
 873 	if ignore_invalid then
 874 		set_message ('maint_issn_ignore');
 875 	else
 876 		if false == valid_issn then
 877 			options.coins_list_t['ISSN'] = nil;									-- when error, unset so not included in COinS
 878 			text = text .. ' ' .. set_message ('err_bad_issn', (options.hkey == 'EISSN') and 'e' or '');	-- add an error message if the ISSN is invalid
 879 		end 
 880 	end
 881 	
 882 	return text
 883 end
 884 
 885 
 886 --[[--------------------------< J F M >-----------------------------------------------------------------------
 887 
 888 A numerical identifier in the form nn.nnnn.nn
 889 
 890 ]]
 891 
 892 local function jfm (options)
 893 	local id = options.id;
 894 	local handler = options.handler;
 895 	local id_num;
 896 	local err_cat = '';
 897 	
 898 	id_num = id:match ('^[Jj][Ff][Mm](.*)$');									-- identifier with jfm prefix; extract identifier
 899 
 900 	if is_set (id_num) then
 901 		set_message ('maint_jfm_format');
 902 	else																		-- plain number without JFM prefix
 903 		id_num = id;															-- if here id does not have prefix
 904 	end
 905 
 906 	if id_num and id_num:match('^%d%d%.%d%d%d%d%.%d%d$') then
 907 		id = id_num;															-- jfm matches pattern
 908 	else
 909 		err_cat = ' ' .. set_message ('err_bad_jfm' );							-- set an error message
 910 		options.coins_list_t['JFM'] = nil;										-- when error, unset so not included in COinS
 911 	end
 912 	
 913 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 914 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
 915 end
 916 
 917 
 918 --[[--------------------------< J S T O R >--------------------------------------------------------------------
 919 
 920 Format a JSTOR with some error checking
 921 
 922 ]]
 923 
 924 local function jstor (options)
 925 	local id = options.id;
 926 	local access = options.access;
 927 	local handler = options.handler;
 928 	local err_msg = '';
 929 
 930 	if id:find ('[Jj][Ss][Tt][Oo][Rr]') or id:find ('^https?://') or id:find ('%s') then
 931 		err_msg = ' ' .. set_message ('err_bad_jstor');							-- set an error message
 932 		options.coins_list_t['JSTOR'] = nil;									-- when error, unset so not included in COinS
 933 	end
 934 	
 935 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 936 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) .. err_msg;
 937 end
 938 
 939 
 940 --[[--------------------------< L C C N >----------------------------------------------------------------------
 941 
 942 Format LCCN link and do simple error checking.  LCCN is a character string 8-12 characters long. The length of
 943 the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits.
 944 http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
 945 
 946 length = 8 then all digits
 947 length = 9 then lccn[1] is lowercase alpha
 948 length = 10 then lccn[1] and lccn[2] are both lowercase alpha or both digits
 949 length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lowercase alpha or both digits
 950 length = 12 then lccn[1] and lccn[2] are both lowercase alpha
 951 
 952 ]]
 953 
 954 local function lccn (options)
 955 	local lccn = options.id;
 956 	local handler = options.handler;
 957 	local err_cat = '';															-- presume that LCCN is valid
 958 	local id = lccn;															-- local copy of the LCCN
 959 
 960 	id = normalize_lccn (id);													-- get canonical form (no whitespace, hyphens, forward slashes)
 961 	local len = id:len();														-- get the length of the LCCN
 962 
 963 	if 8 == len then
 964 		if id:match("[^%d]") then												-- if LCCN has anything but digits (nil if only digits)
 965 			err_cat = ' ' .. set_message ('err_bad_lccn');						-- set an error message
 966 		end
 967 	elseif 9 == len then														-- LCCN should be adddddddd
 968 		if nil == id:match("%l%d%d%d%d%d%d%d%d") then							-- does it match our pattern?
 969 			err_cat = ' ' .. set_message ('err_bad_lccn');						-- set an error message
 970 		end
 971 	elseif 10 == len then														-- LCCN should be aadddddddd or dddddddddd
 972 		if id:match("[^%d]") then												-- if LCCN has anything but digits (nil if only digits) ...
 973 			if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then					-- ... see if it matches our pattern
 974 				err_cat = ' ' .. set_message ('err_bad_lccn');					-- no match, set an error message
 975 			end
 976 		end
 977 	elseif 11 == len then														-- LCCN should be aaadddddddd or adddddddddd
 978 		if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then	-- see if it matches one of our patterns
 979 			err_cat = ' ' .. set_message ('err_bad_lccn');						-- no match, set an error message
 980 		end
 981 	elseif 12 == len then														-- LCCN should be aadddddddddd
 982 		if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then						-- see if it matches our pattern
 983 			err_cat = ' ' .. set_message ('err_bad_lccn');						-- no match, set an error message
 984 		end
 985 	else
 986 		err_cat = ' ' .. set_message ('err_bad_lccn');							-- wrong length, set an error message
 987 	end
 988 
 989 	if not is_set (err_cat) and nil ~= lccn:find ('%s') then
 990 		err_cat = ' ' .. set_message ('err_bad_lccn');							-- lccn contains a space, set an error message
 991 	end
 992 
 993 	if is_set (err_cat) then
 994 		options.coins_list_t['LCCN'] = nil;										-- when error, unset so not included in COinS
 995 	end
 996 
 997 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
 998 			prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) .. err_cat;
 999 end
1000 
1001 
1002 --[[--------------------------< M R >--------------------------------------------------------------------------
1003 
1004 A seven digit number; if not seven digits, zero-fill leading digits to make seven digits.
1005 
1006 ]]
1007 
1008 local function mr (options)
1009 	local id = options.id;
1010 	local handler = options.handler;
1011 	local id_num;
1012 	local id_len;
1013 	local err_cat = '';
1014 	
1015 	id_num = id:match ('^[Mm][Rr](%d+)$');										-- identifier with mr prefix
1016 
1017 	if is_set (id_num) then
1018 		set_message ('maint_mr_format');										-- add maint cat
1019 	else																		-- plain number without mr prefix
1020 		id_num = id:match ('^%d+$');											-- if here id is all digits
1021 	end
1022 
1023 	id_len = id_num and id_num:len() or 0;
1024 	if (7 >= id_len) and (0 ~= id_len) then
1025 		id = string.rep ('0', 7-id_len) .. id_num;								-- zero-fill leading digits
1026 	else
1027 		err_cat = ' ' .. set_message ('err_bad_mr');							-- set an error message
1028 		options.coins_list_t['MR'] = nil;										-- when error, unset so not included in COinS
1029 	end
1030 	
1031 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1032 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
1033 end
1034 
1035 
1036 --[[--------------------------< O C L C >----------------------------------------------------------------------
1037 
1038 Validate and format an OCLC ID.  https://www.oclc.org/batchload/controlnumber.en.html {{dead link}}
1039 archived at: https://web.archive.org/web/20161228233804/https://www.oclc.org/batchload/controlnumber.en.html
1040 
1041 ]]
1042 
1043 local function oclc (options)
1044 	local id = options.id;
1045 	local handler = options.handler;
1046 	local number;
1047 	local err_msg = '';															-- empty string for concatenation
1048 	
1049 	if id:match('^ocm%d%d%d%d%d%d%d%d$') then									-- ocm prefix and 8 digits; 001 field (12 characters)
1050 		number = id:match('ocm(%d+)');											-- get the number
1051 	elseif id:match('^ocn%d%d%d%d%d%d%d%d%d$') then								-- ocn prefix and 9 digits; 001 field (12 characters)
1052 		number = id:match('ocn(%d+)');											-- get the number
1053 	elseif id:match('^on%d%d%d%d%d%d%d%d%d%d+$') then							-- on prefix and 10 or more digits; 001 field (12 characters)
1054 		number = id:match('^on(%d%d%d%d%d%d%d%d%d%d+)$');						-- get the number
1055 	elseif id:match('^%(OCoLC%)[1-9]%d*$') then									-- (OCoLC) prefix and variable number digits; no leading zeros; 035 field
1056 		number = id:match('%(OCoLC%)([1-9]%d*)');								-- get the number
1057 		if 9 < number:len() then
1058 			number = nil;														-- constrain to 1 to 9 digits; change this when OCLC issues 10-digit numbers
1059 		end
1060 	elseif id:match('^%d+$') then												-- no prefix
1061 		number = id;															-- get the number
1062 		if 10 < number:len() then
1063 			number = nil;														-- constrain to 1 to 10 digits; change this when OCLC issues 11-digit numbers
1064 		end
1065 	end
1066 
1067 	if number then																-- proper format
1068 		id = number;															-- exclude prefix, if any, from external link
1069 	else
1070 		err_msg = ' ' .. set_message ('err_bad_oclc')							-- add an error message if the id is malformed
1071 		options.coins_list_t['OCLC'] = nil;										-- when error, unset so not included in COinS
1072 	end
1073 	
1074 	local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1075 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_msg;
1076 
1077 	return text;
1078 end
1079 
1080 
1081 --[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
1082 
1083 Formats an OpenLibrary link, and checks for associated errors.
1084 
1085 ]]
1086 
1087 local function openlibrary (options)
1088 	local id = options.id;
1089 	local access = options.access;
1090 	local handler = options.handler;
1091 	local ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$");				-- strip optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W';
1092 	local error_msg = '';
1093 	local prefix = {															-- these are appended to the handler.prefix according to code
1094 		['A']='authors/OL',
1095 		['M']='books/OL',
1096 		['W']='works/OL',
1097 		['X']='OL'																-- not a code; spoof when 'code' in id is invalid
1098 		};
1099 
1100 	if not ident then
1101 		code = 'X';																-- no code or id completely invalid
1102 		ident = id;																-- copy id to ident so that we display the flawed identifier
1103 		error_msg = ' ' .. set_message ('err_bad_ol');
1104 	end
1105 
1106 	if not is_set (error_msg) then
1107 		options.coins_list_t['OL'] = handler.prefix .. prefix[code] .. ident;	-- experiment for ol coins
1108 	else
1109 		options.coins_list_t['OL'] = nil;										-- when error, unset so not included in COinS
1110 	end
1111 
1112 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1113 		prefix = handler.prefix .. prefix[code],
1114 		id = ident, separator = handler.separator, encode = handler.encode,
1115 		access = access}) .. error_msg;
1116 end
1117 
1118 
1119 --[[--------------------------< O S T I >----------------------------------------------------------------------
1120 
1121 Format OSTI and do simple error checking. OSTIs are sequential numbers beginning at 1 and counting up.  This
1122 code checks the OSTI to see that it contains only digits and is less than test_limit specified in the configuration;
1123 the value in test_limit will need to be updated periodically as more OSTIs are issued.
1124 
1125 NB. 1018 is the lowest OSTI number found in the wild (so far) and resolving OK on the OSTI site
1126 
1127 ]]
1128 
1129 local function osti (options)
1130 	local id = options.id;
1131 	local access = options.access;
1132 	local handler = options.handler;
1133 	local err_cat = '';															-- presume that OSTI is valid
1134 	
1135 	if id:match("[^%d]") then													-- if OSTI has anything but digits
1136 		err_cat = ' ' .. set_message ('err_bad_osti');							-- set an error message
1137 		options.coins_list_t['OSTI'] = nil;										-- when error, unset so not included in COinS
1138 	else																		-- OSTI is only digits
1139 		local id_num = tonumber (id);											-- convert id to a number for range testing
1140 		if 1018 > id_num or handler.id_limit < id_num then						-- if OSTI is outside test limit boundaries
1141 			err_cat = ' ' .. set_message ('err_bad_osti');						-- set an error message
1142 			options.coins_list_t['OSTI'] = nil;									-- when error, unset so not included in COinS
1143 		end
1144 	end
1145 	
1146 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1147 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) .. err_cat;
1148 end
1149 
1150 
1151 --[[--------------------------< P M C >------------------------------------------------------------------------
1152 
1153 Format a PMC, do simple error checking, and check for embargoed articles.
1154 
1155 The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
1156 be linked to the article.  If the embargo date is today or in the past, or if it is empty or omitted, then the
1157 PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
1158 
1159 PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
1160 has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link.  Function is_embargoed ()
1161 returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
1162 
1163 PMCs are sequential numbers beginning at 1 and counting up.  This code checks the PMC to see that it contains only digits and is less
1164 than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
1165 
1166 ]]
1167 
1168 local function pmc (options)
1169 	local id = options.id;
1170 	local embargo = options.Embargo;											-- TODO: lowercase?
1171 	local handler = options.handler;
1172 	local err_cat;
1173 	local id_num;
1174 	local text;
1175 
1176 	id_num = id:match ('^[Pp][Mm][Cc](%d+)$');									-- identifier with PMC prefix
1177 
1178 	if is_set (id_num) then
1179 		set_message ('maint_pmc_format');
1180 	else																		-- plain number without PMC prefix
1181 		id_num = id:match ('^%d+$');											-- if here id is all digits
1182 	end
1183 
1184 	if is_set (id_num) then														-- id_num has a value so test it
1185 		id_num = tonumber (id_num);												-- convert id_num to a number for range testing
1186 		if 1 > id_num or handler.id_limit < id_num then							-- if PMC is outside test limit boundaries
1187 			err_cat = ' ' .. set_message ('err_bad_pmc');						-- set an error message
1188 		else
1189 			id = tostring (id_num);												-- make sure id is a string
1190 		end
1191 	else																		-- when id format incorrect
1192 		err_cat = ' ' .. set_message ('err_bad_pmc');							-- set an error message
1193 	end
1194 	
1195 	if is_set (embargo) and is_set (is_embargoed (embargo)) then				-- is PMC is still embargoed?
1196 		text = table.concat (													-- still embargoed so no external link
1197 			{
1198 			make_wikilink (link_label_make (handler), handler.label),
1199 			handler.separator,
1200 			id,
1201 			(err_cat and err_cat or '')											-- parens required
1202 			});
1203 	else
1204 		text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,	-- no embargo date or embargo has expired, ok to link to article
1205 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access,
1206 			auto_link = not err_cat and 'pmc' or nil							-- do not auto-link when PMC has error
1207 			}) .. (err_cat and err_cat or '');									-- parentheses required
1208 	end
1209 
1210 	if err_cat then
1211 		options.coins_list_t['PMC'] = nil;										-- when error, unset so not included in COinS
1212 	end
1213 
1214 	return text;
1215 end
1216 
1217 
1218 --[[--------------------------< P M I D >----------------------------------------------------------------------
1219 
1220 Format PMID and do simple error checking.  PMIDs are sequential numbers beginning at 1 and counting up.  This
1221 code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
1222 test_limit will need to be updated periodically as more PMIDs are issued.
1223 
1224 ]]
1225 
1226 local function pmid (options)
1227 	local id = options.id;
1228 	local handler = options.handler;
1229 	local err_cat = '';															-- presume that PMID is valid
1230 	
1231 	if id:match("[^%d]") then													-- if PMID has anything but digits
1232 		err_cat = ' ' .. set_message ('err_bad_pmid');							-- set an error message
1233 		options.coins_list_t['PMID'] = nil;										-- when error, unset so not included in COinS
1234 	else																		-- PMID is only digits
1235 		local id_num = tonumber (id);											-- convert id to a number for range testing
1236 		if 1 > id_num or handler.id_limit < id_num then							-- if PMID is outside test limit boundaries
1237 			err_cat = ' ' .. set_message ('err_bad_pmid');						-- set an error message
1238 			options.coins_list_t['PMID'] = nil;									-- when error, unset so not included in COinS
1239 		end
1240 	end
1241 	
1242 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1243 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
1244 end
1245 
1246 
1247 --[[--------------------------< R F C >------------------------------------------------------------------------
1248 
1249 Format RFC and do simple error checking. RFCs are sequential numbers beginning at 1 and counting up.  This
1250 code checks the RFC to see that it contains only digits and is less than test_limit specified in the configuration;
1251 the value in test_limit will need to be updated periodically as more RFCs are issued.
1252 
1253 An index of all RFCs is here: https://tools.ietf.org/rfc/
1254 
1255 ]]
1256 
1257 local function rfc (options)
1258 	local id = options.id;
1259 	local handler = options.handler;
1260 	local err_cat = '';															-- presume that RFC is valid
1261 	
1262 	if id:match("[^%d]") then													-- if RFC has anything but digits
1263 		err_cat = ' ' .. set_message ('err_bad_rfc');							-- set an error message
1264 		options.coins_list_t['RFC'] = nil;										-- when error, unset so not included in COinS
1265 	else																		-- RFC is only digits
1266 		local id_num = tonumber (id);											-- convert id to a number for range testing
1267 		if 1 > id_num or handler.id_limit < id_num then							-- if RFC is outside test limit boundaries
1268 			err_cat = ' ' .. set_message ('err_bad_rfc');						-- set an error message
1269 			options.coins_list_t['RFC'] = nil;									-- when error, unset so not included in COinS
1270 		end
1271 	end
1272 	
1273 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1274 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;
1275 end
1276 
1277 
1278 --[[--------------------------< S 2 C I D >--------------------------------------------------------------------
1279 
1280 Format an S2CID, do simple error checking
1281 
1282 S2CIDs are sequential numbers beginning at 1 and counting up.  This code checks the S2CID to see that it is only
1283 digits and is less than test_limit; the value in local variable test_limit will need to be updated periodically
1284 as more S2CIDs are issued.
1285 
1286 ]]
1287 
1288 local function s2cid (options)
1289 	local id = options.id;
1290 	local access = options.access;
1291 	local handler = options.handler;
1292 	local err_cat = '';															-- presume that S2CID is valid
1293 	local id_num;
1294 	local text;
1295 	
1296 	id_num = id:match ('^[1-9]%d*$');											-- id must be all digits; must not begin with 0; no open access flag
1297 
1298  	if is_set (id_num) then														-- id_num has a value so test it
1299 		id_num = tonumber (id_num);												-- convert id_num to a number for range testing
1300 		if handler.id_limit < id_num then										-- if S2CID is outside test limit boundaries
1301 			err_cat = ' ' .. set_message ('err_bad_s2cid');						-- set an error message
1302 			options.coins_list_t['S2CID'] = nil;								-- when error, unset so not included in COinS
1303 		end
1304 
1305 	else																		-- when id format incorrect
1306 		err_cat = ' ' .. set_message ('err_bad_s2cid');							-- set an error message
1307 		options.coins_list_t['S2CID'] = nil;									-- when error, unset so not included in COinS
1308 	end
1309 
1310 	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1311 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = access}) .. err_cat;
1312 
1313 	return text;
1314 end
1315 
1316 
1317 --[[--------------------------< S B N >------------------------------------------------------------------------
1318 
1319 9-digit form of ISBN-10; uses same check-digit validation when SBN is prefixed with an additional '0' to make 10 digits
1320 
1321 sbn value not made part of COinS metadata because we don't have a url or isn't a COinS-defined identifier (rft.xxx)
1322 or an identifier registered at info-uri.info (info:)
1323 
1324 ]]
1325 
1326 local function sbn (options)
1327 	local id = options.id;
1328 	local ignore_invalid = options.accept;
1329 	local handler = options.handler;
1330 	local function return_result (check, err_type) -- local function to handle the various returns
1331 		local SBN = internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
1332 						prefix = handler.prefix, id = id, separator = handler.separator});
1333 		if not ignore_invalid then -- if not ignoring SBN errors
1334 			if not check then
1335 				options.coins_list_t['SBN'] = nil;								-- when error, unset so not included in COinS; not really necessary here because sbn not made part of COinS
1336 				return SBN .. set_message ('err_bad_sbn', {err_type}, false, ' '); -- display an error message
1337 			end
1338 		else
1339 			set_message ('maint_isbn_ignore');									-- add a maint category even when there is no error (ToDo: Possibly switch to separate message for SBNs only)
1340 		end
1341 		return SBN;
1342 	end
1343 
1344 	if id:match ('[^%s-0-9X]') then
1345 		return return_result (false, cfg.err_msg_supl.char);					-- fail if SBN contains anything but digits, hyphens, or the uppercase X
1346 	end
1347 
1348 	local ident = id:gsub ('[%s-]', '');										-- remove hyphens and whitespace; they interfere with the rest of the tests
1349 
1350 	if  9 ~= ident:len() then
1351 		return return_result (false, cfg.err_msg_supl.length);					-- fail if incorrect length
1352 	end
1353 
1354 	if ident:match ('^%d*X?$') == nil then
1355 		return return_result (false, cfg.err_msg_supl.form);					-- fail if SBN has 'X' anywhere but last position
1356 	end
1357 
1358 	return return_result (is_valid_isxn ('0' .. ident, 10), cfg.err_msg_supl.check);
1359 end
1360 
1361 
1362 --[[--------------------------< S S R N >----------------------------------------------------------------------
1363 
1364 Format an SSRN, do simple error checking
1365 
1366 SSRNs are sequential numbers beginning at 100? and counting up.  This code checks the SSRN to see that it is
1367 only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need
1368 to be updated periodically as more SSRNs are issued.
1369 
1370 ]]
1371 
1372 local function ssrn (options)
1373 	local id = options.id;
1374 	local handler = options.handler;
1375 	local err_cat = '';															-- presume that SSRN is valid
1376 	local id_num;
1377 	local text;
1378 	
1379 	id_num = id:match ('^%d+$');												-- id must be all digits
1380 
1381 	if is_set (id_num) then														-- id_num has a value so test it
1382 		id_num = tonumber (id_num);												-- convert id_num to a number for range testing
1383 		if 100 > id_num or handler.id_limit < id_num then						-- if SSRN is outside test limit boundaries
1384 			err_cat = ' ' .. set_message ('err_bad_ssrn');						-- set an error message
1385 			options.coins_list_t['SSRN'] = nil;									-- when error, unset so not included in COinS
1386 		end
1387 	else																		-- when id format incorrect
1388 		err_cat = ' ' .. set_message ('err_bad_ssrn');							-- set an error message
1389 		options.coins_list_t['SSRN'] = nil;										-- when error, unset so not included in COinS
1390 	end
1391 	
1392 	text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1393 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}) .. err_cat;
1394 
1395 	return text;
1396 end
1397 
1398 
1399 --[[--------------------------< U S E N E T _ I D >------------------------------------------------------------
1400 
1401 Validate and format a usenet message id.  Simple error checking, looks for 'id-left@id-right' not enclosed in
1402 '<' and/or '>' angle brackets.
1403 
1404 ]]
1405 
1406 local function usenet_id (options)
1407 	local id = options.id;
1408 	local handler = options.handler;
1409 
1410 	local text = external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1411 		prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode})
1412  
1413 	if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then				-- doesn't have '@' or has one or first or last character is '< or '>'
1414 		text = text .. ' ' .. set_message ('err_bad_usenet_id')					-- add an error message if the message id is invalid
1415 		options.coins_list_t['USENETID'] = nil;									-- when error, unset so not included in COinS
1416 	end 
1417 	
1418 	return text
1419 end
1420 
1421 
1422 --[[--------------------------< Z B L >-----------------------------------------------------------------------
1423 
1424 A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional
1425 
1426 format described here: http://emis.mi.sanu.ac.rs/ZMATH/zmath/en/help/search/
1427 
1428 temporary format is apparently eight digits.  Anything else is an error
1429 
1430 ]]
1431 
1432 local function zbl (options)
1433 	local id = options.id;
1434 	local handler = options.handler;
1435 	local err_cat = '';
1436 	
1437 	if id:match('^%d%d%d%d%d%d%d%d$') then										-- is this identifier using temporary format?
1438 		set_message ('maint_zbl');												-- yes, add maint cat
1439 	elseif not id:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then						-- not temporary, is it normal format?
1440 		err_cat = ' ' .. set_message ('err_bad_zbl');							-- no, set an error message
1441 		options.coins_list_t['ZBL'] = nil;										-- when error, unset so not included in COinS
1442 	end
1443 	
1444 	return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
1445 			prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) .. err_cat;
1446 end
1447 
1448 
1449 --============================<< I N T E R F A C E   F U N C T I O N S >>==========================================
1450 
1451 --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------
1452 
1453 Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
1454 any of the parameters listed in each cfg.id_handlers['...'].parameters.  If found, adds the parameter and value to
1455 the identifier list.  Emits redundant error message if more than one alias exists in args
1456 
1457 ]]
1458 
1459 local function extract_ids (args)
1460 	local id_list = {};															-- list of identifiers found in args
1461 	for k, v in pairs (cfg.id_handlers) do										-- k is uppercase identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
1462 		v = select_one (args, v.parameters, 'err_redundant_parameters' );		-- v.parameters is a table of aliases for k; here we pick one from args if present
1463 		if is_set (v) then id_list[k] = v; end									-- if found in args, add identifier to our list
1464 	end
1465 	return id_list;
1466 end
1467 
1468 
1469 --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >--------------------------------------
1470 
1471 Fetches custom id access levels from arguments using configuration settings. Parameters which have a predefined access
1472 level (e.g. arxiv) do not use this function as they are directly rendered as free without using an additional parameter.
1473 
1474 returns a table of k/v pairs where k is same as the identifier's key in cfg.id_handlers and v is the assigned (valid) keyword
1475 
1476 access-level values must match the case used in cfg.keywords_lists['id-access'] (lowercase unless there is some special reason for something else)
1477 
1478 ]]
1479 
1480 local function extract_id_access_levels (args, id_list)
1481 	local id_accesses_list = {};
1482 	for k, v in pairs (cfg.id_handlers) do
1483 		local access_param = v.custom_access;									-- name of identifier's access-level parameter
1484 		if is_set (access_param) then
1485 			local access_level = args[access_param];							-- get the assigned value if there is one
1486 			if is_set (access_level) then
1487 				if not in_array (access_level, cfg.keywords_lists['id-access']) then	-- exact match required
1488 					table.insert (z.message_tail, { set_message ('err_invalid_param_val', {access_param, access_level}, true) } );	
1489 					access_level = nil;											-- invalid so unset
1490 				end
1491 				if not is_set (id_list[k]) then									-- identifier access-level must have a matching identifier
1492 					table.insert (z.message_tail, { set_message ('err_param_access_requires_param', {k:lower()}, true) } );	-- parameter name is uppercase in cfg.id_handlers (k); lowercase for error message
1493 				end
1494 				id_accesses_list[k] = cfg.keywords_xlate[access_level];			-- get translated keyword
1495 			end
1496 		end
1497 	end
1498 	return id_accesses_list;
1499 end
1500 
1501 
1502 --[[--------------------------< B U I L D _ I D _ L I S T >----------------------------------------------------
1503 
1504 render the identifiers into a sorted sequence table
1505 
1506 <ID_list_coins_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value
1507 <options_t> is a table of various k/v option pairs provided in the call to new_build_id_list();
1508 	modified by	this function and passed to all identifier rendering functions
1509 <access_levels_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value (if valid)
1510 
1511 returns a sequence table of sorted (by hkey - 'handler' key) rendered identifier strings
1512 
1513 ]]
1514 
1515 local function build_id_list (ID_list_coins_t, options_t, access_levels_t)
1516 	local ID_list_t = {};
1517 	local accept;
1518 	local func_map = {															--function map points to functions associated with hkey identifier
1519 		['ARXIV'] = arxiv,
1520 		['ASIN'] = asin,
1521 		['BIBCODE'] = bibcode,
1522 		['BIORXIV'] = biorxiv,
1523 		['CITESEERX'] = citeseerx,
1524 		['DOI'] = doi,
1525 		['EISSN'] = issn,
1526 		['HDL'] = hdl,
1527 		['ISBN'] = isbn,
1528 		['ISMN'] = ismn,
1529 		['ISSN'] = issn,
1530 		['JFM'] = jfm,
1531 		['JSTOR'] = jstor,
1532 		['LCCN'] = lccn,
1533 		['MR'] = mr,
1534 		['OCLC'] = oclc,
1535 		['OL'] = openlibrary,
1536 		['OSTI'] = osti,
1537 		['PMC'] = pmc,
1538 		['PMID'] = pmid,
1539 		['RFC']  = rfc,
1540 		['S2CID'] = s2cid,
1541 		['SBN'] = sbn,
1542 		['SSRN'] = ssrn,
1543 		['USENETID'] = usenet_id,
1544 		['ZBL'] = zbl,
1545 		}
1546 
1547 	for hkey, v in pairs (ID_list_coins_t) do
1548 		v, accept = has_accept_as_written (v);									-- remove accept-as-written markup if present; accept is boolean true when markup removed; false else
1549 																				-- every function gets the options table with value v and accept boolean
1550 		options_t.hkey = hkey;													-- ~/Configuration handler key
1551 		options_t.id = v;														-- add that identifier value to the options table
1552 		options_t.accept = accept;												-- add the accept boolean flag
1553 		options_t.access = access_levels_t[hkey];								-- add the access level for those that have an |<identifier-access= parameter
1554 		options_t.handler = cfg.id_handlers[hkey];
1555 		options_t.coins_list_t = ID_list_coins_t;								-- pointer to ID_list_coins_t; for |asin= and |ol=; also to keep erroneous values out of the citation's metadata
1556 		
1557 		if func_map[hkey] then
1558 			table.insert (ID_list_t, {hkey, func_map[hkey] (options_t)});		-- call the function and add the results to the output sequence table
1559 		else
1560 			error (cfg.messages['unknown_ID_key'] .. ' ' .. hkey);				-- here when func_map doesn't have a function for hkey
1561 		end
1562 	end
1563 
1564 	local function comp (a, b)													-- used by following table.sort()
1565 		return a[1]:lower() < b[1]:lower();										-- sort by hkey
1566 	end
1567 
1568 	table.sort (ID_list_t, comp);												-- sequence table of tables sort	
1569 	for k, v in ipairs (ID_list_t) do											-- convert sequence table of tables to simple sequence table of strings
1570 		ID_list_t[k] = v[2];													-- v[2] is the identifier rendering from the call to the various functions in func_map{}
1571 	end
1572 	
1573 	return ID_list_t;
1574 end
1575 
1576 
1577 --[[--------------------------< O P T I O N S _ C H E C K >----------------------------------------------------
1578 
1579 check that certain option parameters have their associated identifier parameters with values
1580 
1581 <ID_list_coins_t> is a table of k/v pairs where k is same as key in cfg.id_handlers and v is the assigned value
1582 <ID_support_t> is a sequence table of tables created in citation0() where each subtable has four elements:
1583 	[1] is the support parameter's assigned value; empty string if not set
1584 	[2] is a text string same as key in cfg.id_handlers
1585 	[3] is cfg.error_conditions key used to create error message
1586 	[4] is original ID support parameter name used to create error message
1587 	
1588 returns nothing; on error emits an appropriate error message
1589 
1590 ]]
1591 
1592 local function options_check (ID_list_coins_t, ID_support_t)
1593 	for _, v in ipairs (ID_support_t) do
1594 		if is_set (v[1]) and not ID_list_coins_t[v[2]] then						-- when support parameter has a value but matching identifier parameter is missing or empty
1595 			table.insert (z.message_tail, {set_message (v[3], (v[4]))});		-- emit the appropriate error message
1596 		end
1597 	end
1598 end
1599 
1600 
1601 --[[--------------------------< I D E N T I F I E R _ L I S T S _ G E T >--------------------------------------
1602 
1603 Creates two identifier lists: a k/v table of identifiers and their values to be used locally and for use in the
1604 COinS metadata, and a sequence table of the rendered identifier strings that will be included in the rendered
1605 citation.
1606 
1607 ]]
1608 
1609 local function identifier_lists_get (args, options_t, ID_support_t)
1610 	local ID_list_coins_t = extract_ids (args);											-- get a table of identifiers and their values for use locally and for use in COinS
1611 	options_check (ID_list_coins_t, ID_support_t);										-- ID support parameters must have matching identifier parameters 
1612 	local ID_access_levels_t = extract_id_access_levels (args, ID_list_coins_t);		-- get a table of identifier access levels
1613 	local ID_list_t = build_id_list (ID_list_coins_t, options_t, ID_access_levels_t);	-- get a sequence table of rendered identifier strings
1614 
1615 	return ID_list_t, ID_list_coins_t;											-- return the tables
1616 end
1617 
1618 
1619 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
1620 
1621 Sets local cfg table and imported functions table to same (live or sandbox) as that used by the other modules.
1622 
1623 ]]
1624 
1625 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
1626 	cfg = cfg_table_ptr;
1627 
1628 	has_accept_as_written = utilities_page_ptr.has_accept_as_written;			-- import functions from select Module:Citation/CS1/Utilities module
1629 	is_set = utilities_page_ptr.is_set;								
1630 	in_array = utilities_page_ptr.in_array;
1631 	set_message = utilities_page_ptr.set_message;
1632 	select_one = utilities_page_ptr.select_one;
1633 	substitute = utilities_page_ptr.substitute;
1634 	make_wikilink = utilities_page_ptr.make_wikilink;
1635 
1636 	z = utilities_page_ptr.z;													-- table of tables in Module:Citation/CS1/Utilities
1637 end
1638 
1639 
1640 --[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
1641 ]]
1642 
1643 return {
1644 	auto_link_urls = auto_link_urls,											-- table of identifier URLs to be used when auto-linking |title=
1645 	
1646 	identifier_lists_get = identifier_lists_get,								-- experiment to replace individual calls to build_id_list, extract_ids, extract_id_access_levels
1647 	is_embargoed = is_embargoed;
1648 	set_selected_modules = set_selected_modules;
1649 	}