Module:Citation/CS1/Date validation

From WikiProjectMed
Jump to navigation Jump to search

Documentation for this module may be created at Module:Citation/CS1/Date validation/doc

   1 --[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------
   2 ]]
   3 
   4 local add_prop_cat, is_set, in_array, set_message, substitute, wrap_style;		-- imported functions from selected Module:Citation/CS1/Utilities
   5 local cfg;																		-- table of tables imported from selected Module:Citation/CS1/Configuration
   6 
   7 
   8 --[[--------------------------< F I L E - S C O P E   D E C L A R A T I O N S >--------------------------------
   9 
  10 File-scope variables are declared here
  11 
  12 ]]
  13 
  14 local lang_object = mw.getContentLanguage();									-- used by is_valid_accessdate(), is_valid_year(), date_name_xlate(); TODO: move to ~/Configuration?
  15 local year_limit;																-- used by is_valid_year()
  16 
  17 
  18 --[=[-------------------------< I S _ V A L I D _ A C C E S S D A T E >----------------------------------------
  19 
  20 returns true if:
  21 	Wikipedia start date <= accessdate < today + 2 days
  22 
  23 Wikipedia start date is 2001-01-15T00:00:00 UTC which is 979516800 seconds after 1970-01-01T00:00:00 UTC (the start of Unix time)
  24 accessdate is the date provided in |access-date= at time 00:00:00 UTC
  25 today is the current date at time 00:00:00 UTC plus 48 hours
  26 	if today is 2015-01-01T00:00:00 then
  27 		adding 24 hours gives 2015-01-02T00:00:00 – one second more than today
  28 		adding 24 hours gives 2015-01-03T00:00:00 – one second more than tomorrow
  29 
  30 This function does not work if it is fed month names for languages other than English.  Wikimedia #time: parser
  31 apparently doesn't understand non-English date month names. This function will always return false when the date
  32 contains a non-English month name because good1 is false after the call to lang.formatDate().  To get around that
  33 call this function with YYYY-MM-DD format dates.
  34 
  35 ]=]
  36 
  37 local function is_valid_accessdate (accessdate)
  38 	local good1, good2;
  39 	local access_ts, tomorrow_ts;												-- to hold Unix time stamps representing the dates
  40 
  41 	good1, access_ts = pcall (lang_object.formatDate, lang_object, 'U', accessdate );			-- convert accessdate value to Unix timestamp 
  42 	good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' );	-- today midnight + 2 days is one second more than all day tomorrow
  43 	
  44 	if good1 and good2 then														-- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand
  45 		access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts);		-- convert to numbers for the comparison;
  46 		tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
  47 	else
  48 		return false;															-- one or both failed to convert to Unix time stamp
  49 	end
  50 
  51 	if 979516800 <= access_ts and access_ts < tomorrow_ts then					-- Wikipedia start date <= accessdate < tomorrow's date
  52 		return true;
  53 	else
  54 		return false;															-- accessdate out of range
  55 	end
  56 end
  57 
  58 
  59 --[[--------------------------< I S _ V A L I D _ E M B A R G O _ D A T E >------------------------------------
  60 
  61 returns true and date value if that value has proper dmy, mdy, ymd format.
  62 
  63 returns false and 9999 (embargoed forever) when date value is not proper format; assumes that when |pmc-embargo-date= is
  64 set, the editor intended to embargo a PMC but |pmc-embargo-date= does not hold a single date.
  65 
  66 ]]
  67 
  68 local function is_valid_embargo_date (v)
  69 	if v:match ('^%d%d%d%d%-%d%d%-%d%d$') or									-- ymd
  70 		v:match ('^%d%d?%s+%a+%s+%d%d%d%d$') or									-- dmy
  71 		v:match ('^%a+%s+%d%d?%s*,%s*%d%d%d%d$') then							-- mdy
  72 			return true, v;
  73 	end
  74 	return false, '9999';														-- if here not good date so return false and set embargo date to long time in future
  75 end
  76 
  77 
  78 --[[--------------------------< G E T _ M O N T H _ N U M B E R >----------------------------------------------
  79 
  80 returns a number according to the month in a date: 1 for January, etc.  Capitalization and spelling must be correct.
  81 If not a valid month, returns 0
  82 
  83 ]]
  84 
  85 local function get_month_number (month)
  86 	return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or	-- look for local names first
  87 			cfg.date_names['en'].long[month] or	cfg.date_names['en'].short[month] or		-- failing that, look for English names
  88 			0;																				-- not a recognized month name
  89 end
  90 
  91 
  92 --[[--------------------------< G E T _ S E A S O N _ N U M B E R >--------------------------------------------
  93 
  94 returns a number according to the sequence of seasons in a year: 21 for Spring, etc.  Capitalization and spelling
  95 must be correct. If not a valid season, returns 0.
  96 	21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere”
  97 
  98 returns 0 when <param> is not |date=
  99 
 100 Season numbering is defined by Extended Date/Time Format (EDTF) specification (https://www.loc.gov/standards/datetime/)
 101 which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
 102 numbers 21-41.  cs1|2 only supports generic seasons.  EDTF does support the distinction between north and south
 103 hemisphere seasons but cs1|2 has no way to make that distinction.
 104 
 105 These additional divisions not currently supported:
 106 	25-28 = Spring - Northern Hemisphere, Summer- Northern Hemisphere, Autumn - Northern Hemisphere, Winter - Northern Hemisphere
 107 	29-32 = Spring – Southern Hemisphere, Summer– Southern Hemisphere, Autumn – Southern Hemisphere, Winter - Southern Hemisphere
 108 	33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each)
 109 	37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each)
 110 	40-41 = Semestral 1, Semestral-2 (6 months each)
 111 
 112 ]]
 113 
 114 local function get_season_number (season, param)
 115 	if 'date' ~= param then
 116 		return 0;																-- season dates only supported by |date=
 117 	end
 118 	return cfg.date_names['local'].season[season] or							-- look for local names first
 119 			cfg.date_names['en'].season[season] or								-- failing that, look for English names
 120 			0;																	-- not a recognized season name
 121 end
 122 
 123 
 124 --[[--------------------------< G E T _ Q U A R T E R _ N U M B E R >------------------------------------------
 125 
 126 returns a number according to the sequence of quarters in a year: 33 for first quarter, etc.  Capitalization and spelling
 127 must be correct. If not a valid quarter, returns 0.
 128 	33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each)
 129 
 130 returns 0 when <param> is not |date=
 131 
 132 Quarter numbering is defined by Extended Date/Time Format (EDTF) specification (https://www.loc.gov/standards/datetime/)
 133 which became part of ISO 8601 in 2019.  See '§Sub-year groupings'.  The standard defines various divisions using
 134 numbers 21-41.  cs1|2 only supports generic seasons and quarters.
 135 
 136 These additional divisions not currently supported:
 137 	37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each)
 138 	40-41 = Semestral 1, Semestral-2 (6 months each)
 139 
 140 ]]
 141 
 142 local function get_quarter_number (quarter, param)
 143 	if 'date' ~= param then
 144 		return 0;																-- quarter dates only supported by |date=
 145 	end
 146 	quarter = mw.ustring.gsub (quarter, ' +', ' ');								-- special case replace multiple space chars with a single space char
 147 	return cfg.date_names['local'].quarter[quarter] or							-- look for local names first
 148 			cfg.date_names['en'].quarter[quarter] or							-- failing that, look for English names
 149 			0;																	-- not a recognized quarter name
 150 end
 151 
 152 
 153 --[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >----------------------------------
 154 
 155 returns a non-zero number if date contains a recognized proper-name.  Capitalization and spelling must be correct.
 156 
 157 returns 0 when <param> is not |date=
 158 
 159 ]]
 160 
 161 local function get_proper_name_number (name, param)
 162 	if 'date' ~= param then
 163 		return 0;																-- proper-name dates only supported by |date=
 164 	end
 165 	return cfg.date_names['local'].named[name] or								-- look for local names dates first
 166 			cfg.date_names['en'].named[name] or									-- failing that, look for English names
 167 			0;																	-- not a recognized named date
 168 end
 169 
 170 
 171 --[[--------------------------< G E T _ E L E M E N T _ N U M B E R <------------------------------------------
 172 
 173 returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated)
 174 
 175 ]]
 176 
 177 local function get_element_number (element, param)
 178 	local num;
 179 	
 180 	local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number};	-- list of functions to execute in order
 181 	
 182 	for _, func in ipairs (funcs) do											-- spin through the function list
 183 		num = func (element, param);											-- call the function and get the returned number
 184 		if 0 ~= num then														-- non-zero when valid month season quarter 
 185 			return num;															-- return that number
 186 		end
 187 	end
 188 	return nil;																	-- not valid
 189 end
 190 
 191 
 192 --[[--------------------------< I S _ V A L I D _ Y E A R >----------------------------------------------------
 193 
 194 Function gets current year from the server and compares it to year from a citation parameter.  Years more than one
 195 year in the future are not acceptable.
 196 
 197 ]]
 198 
 199 local function is_valid_year (year)
 200 	if not is_set(year_limit) then
 201 		year_limit = tonumber(os.date("%Y"))+1;									-- global variable so we only have to fetch it once
 202 	end
 203 
 204 	year = tonumber (year) or lang_object:parseFormattedNumber (year);			-- convert to numbers for the comparison;
 205 	return year and (year <= year_limit) or false;
 206 end
 207 
 208 
 209 --[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
 210 
 211 Returns true if day is less than or equal to the number of days in month and year is no farther into the future
 212 than next year; else returns false.
 213 
 214 Assumes Julian calendar prior to year 1582 and Gregorian calendar thereafter. Accounts for Julian calendar leap
 215 years before 1582 and Gregorian leap years after 1582. Where the two calendars overlap (1582 to approximately
 216 1923) dates are assumed to be Gregorian.
 217 
 218 ]]
 219 
 220 local function is_valid_date (year, month, day)
 221 local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 222 local month_length;
 223 	if not is_valid_year(year) then												-- no farther into the future than next year
 224 		return false;
 225 	end
 226 	
 227 	month = tonumber(month);													-- required for YYYY-MM-DD dates
 228 	
 229 	if (2 == month) then														-- if February
 230 		month_length = 28;														-- then 28 days unless
 231 		if 1582 > tonumber(year) then											-- Julian calendar
 232 			if 0 == (year%4) then												-- is a leap year?
 233 				month_length = 29;												-- if leap year then 29 days in February
 234 			end
 235 		else																	-- Gregorian calendar
 236 			if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then	-- is a leap year?
 237 				month_length = 29;												-- if leap year then 29 days in February
 238 			end
 239 		end
 240 	else
 241 		month_length = days_in_month[month];
 242 	end
 243 
 244 	if tonumber (day) > month_length then
 245 		return false;
 246 	end
 247 	return true;
 248 end
 249 
 250 
 251 --[[--------------------------< I S _ V A L I D _ M O N T H _ R A N G E _ S T Y L E >--------------------------
 252 
 253 Months in a range are expected to have the same style: Jan–Mar or October–December but not February–Mar or Jul–August. 
 254 There is a special test for May because it can be either short or long form.
 255 
 256 Returns true when style for both months is the same
 257 
 258 ]]
 259 
 260 local function is_valid_month_range_style (month1, month2)
 261 local len1 = month1:len();
 262 local len2 = month2:len();
 263 	if len1 == len2 then
 264 		return true;															-- both months are short form so return true
 265 	elseif 'May' == month1 or 'May'== month2 then -- ToDo: I18N
 266 		return true;															-- both months are long form so return true
 267 	elseif 3 == len1 or 3 == len2 then
 268 		return false;															-- months are mixed form so return false
 269 	else
 270 		return true;															-- both months are long form so return true
 271 	end
 272 end
 273 
 274 
 275 --[[--------------------------< I S _ V A L I D _ M O N T H _ S E A S O N _ R A N G E >------------------------
 276 
 277 Check a pair of months or seasons to see if both are valid members of a month or season pair.
 278 
 279 Month pairs are expected to be left to right, earliest to latest in time.
 280 
 281 All season ranges are accepted as valid because there are publishers out there who have published a Summer–Spring YYYY issue, hence treat as ok
 282 
 283 ]]
 284 
 285 local function is_valid_month_season_range(range_start, range_end, param)
 286 	local range_start_number = get_month_number (range_start);
 287 	local range_end_number;
 288 
 289 	if 0 == range_start_number then												-- is this a month range?
 290 		range_start_number = get_season_number (range_start, param);			-- not a month; is it a season? get start season number
 291 		range_end_number = get_season_number (range_end, param);				-- get end season number
 292 
 293 		if (0 ~= range_start_number) and (0 ~= range_end_number) and (range_start_number ~= range_end_number) then
 294 			return true;														-- any season pairing is accepted except when both are the same
 295 		end
 296 		return false;															-- range_start and/or range_end is not a season
 297 	end
 298 																				-- here when range_start is a month
 299 	range_end_number = get_month_number (range_end);							-- get end month number
 300 	if range_start_number < range_end_number and								-- range_start is a month; does range_start precede range_end?
 301 		is_valid_month_range_style (range_start, range_end) then				-- do months have the same style?
 302 			return true;														-- proper order and same style
 303 	end
 304 	return false;																-- range_start month number is greater than or equal to range end number; or range end isn't a month
 305 end
 306 
 307 
 308 --[[--------------------------< M A K E _ C O I N S _ D A T E >------------------------------------------------
 309 
 310 This function receives a table of date parts for one or two dates and an empty table reference declared in
 311 Module:Citation/CS1.  The function is called only for |date= parameters and only if the |date=<value> is 
 312 determined to be a valid date format.  The question of what to do with invalid date formats is not answered here.
 313 
 314 The date parts in the input table are converted to an ISO 8601 conforming date string:
 315 	single whole dates:		yyyy-mm-dd
 316 	month and year dates:	yyyy-mm
 317 	year dates:				yyyy
 318 	ranges:					yyyy-mm-dd/yyyy-mm-dd
 319 							yyyy-mm/yyyy-mm
 320 							yyyy/yyyy
 321 
 322 Dates in the Julian calendar are reduced to year or year/year so that we don't have to do calendar conversion from
 323 Julian to Proleptic Gregorian.
 324 
 325 The input table has:
 326 	year, year2 – always present; if before 1582, ignore months and days if present
 327 	month, month2 – 0 if not provided, 1-12 for months, 21-24 for seasons; 99 Christmas
 328 	day, day2 –  0 if not provided, 1-31 for days
 329 	
 330 the output table receives:
 331 	rftdate:	an ISO 8601 formatted date
 332 	rftchron:	a free-form version of the date, usually without year which is in rftdate (season ranges and proper-name dates)
 333 	rftssn:		one of four season keywords: winter, spring, summer, fall (lowercase)
 334 	rftquarter:	one of four values: 1, 2, 3, 4
 335 
 336 ]]
 337 
 338 local function make_COinS_date (input, tCOinS_date)
 339 	local date;																	-- one date or first date in a range
 340 	local date2 = '';															-- end of range date
 341 -- start temporary Julian / Gregorian calendar uncertainty detection
 342 	local year = tonumber(input.year);											-- this temporary code to determine the extent of sources dated to the Julian/Gregorian
 343 	local month = tonumber(input.month);										-- interstice 1 October 1582 – 1 January 1926
 344 	local day = tonumber (input.day);
 345 	if (0 ~= day) and															-- day must have a value for this to be a whole date
 346 		(((1582 == year) and (10 <= month) and (12 >= month)) or				-- any whole 1582 date from 1 October to 31 December or
 347 			((1926 == year) and (1 == month) and (1 == input.day)) or			-- 1 January 1926 or
 348 				((1582 < year) and (1925 >= year))) then						-- any date 1 January 1583 – 31 December 1925
 349 					tCOinS_date.inter_cal_cat = true;							-- set category flag true
 350 	end
 351 -- end temporary Julian / Gregorian calendar uncertainty detection
 352 	
 353 	if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then			-- Julian calendar or season so &rft.date gets year only
 354 		date = input.year;
 355 		if 0 ~= input.year2 and input.year ~= input.year2 then					-- if a range, only the second year portion when not the same as range start year
 356 			date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2))		-- assemble the date range
 357 		end
 358 		if 20 < tonumber(input.month) then										-- if season or proper-name date
 359 			local season = {[24] = 'winter', [21] = 'spring', [22] = 'summer', [23] = 'fall', [33] = '1', [34] = '2', [35] = '3', [36] = '4', [98] = 'Easter', [99] = 'Christmas'};	-- seasons lowercase, no autumn; proper-names use title case
 360 			if 0 == input.month2 then											-- single season date
 361 				if 40 < tonumber(input.month) then
 362 					tCOinS_date.rftchron = season[input.month];					-- proper-name dates
 363 				elseif 30 < tonumber(input.month) then
 364 					tCOinS_date.rftquarter = season[input.month];				-- quarters
 365 				else
 366 					tCOinS_date.rftssn = season[input.month];					-- seasons
 367 				end
 368 			else																-- season range with a second season specified
 369 				if input.year ~= input.year2 then								-- season year – season year range or season year–year
 370 					tCOinS_date.rftssn = season[input.month];					-- start of range season; keep this?
 371 					if 0~= input.month2 then
 372 						tCOinS_date.rftchron = string.format ('%s %s – %s %s', season[input.month], input.year, season[input.month2], input.year2);
 373 					end
 374 				else															-- season–season year range
 375 					tCOinS_date.rftssn = season[input.month];					-- start of range season; keep this?
 376 					tCOinS_date.rftchron = season[input.month] .. '–' .. season[input.month2];	-- season–season year range
 377 				end
 378 			end
 379 		end
 380 		tCOinS_date.rftdate = date;
 381 		return;																	-- done
 382 	end
 383 	
 384 	if 0 ~= input.day then
 385 		date = string.format ('%s-%.2d-%.2d', input.year, tonumber(input.month), tonumber(input.day));	-- whole date
 386 	elseif 0 ~= input.month then
 387 		date = string.format ('%s-%.2d', input.year, tonumber(input.month));	-- year and month
 388 	else
 389 		date = string.format ('%s', input.year);								-- just year
 390 	end
 391 
 392 	if 0 ~= input.year2 then
 393 		if 0 ~= input.day2 then
 394 			date2 = string.format ('/%s-%.2d-%.2d', input.year2, tonumber(input.month2), tonumber(input.day2));		-- whole date
 395 		elseif 0 ~= input.month2 then
 396 			date2 = string.format ('/%s-%.2d', input.year2, tonumber(input.month2));	-- year and month
 397 		else
 398 			date2 = string.format ('/%s', input.year2);							-- just year
 399 		end
 400 	end
 401 	
 402 	tCOinS_date.rftdate = date .. date2;										-- date2 has the '/' separator
 403 	return;
 404 end
 405 
 406 
 407 --[[--------------------------< P A T T E R N S >--------------------------------------------------------------
 408 
 409 this is the list of patterns for date formats that this module recognizes.  Approximately the first half of these
 410 patterns represent formats that might be reformatted into another format.  Those that might be reformatted have
 411 'indicator' letters that identify the content of the matching capture: 'd' (day), 'm' (month), 'a' (anchor year),
 412 'y' (year); second day, month, year have a '2' suffix.
 413 
 414 These patterns are used for both date validation and for reformatting.  This table should not be moved to ~/Configuration
 415 because changes to this table require changes to check_date() and to reformatter() and reformat_date()
 416 
 417 ]]
 418 
 419 local patterns = {
 420 	 																			-- year-initial numerical year-month-day
 421 	['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'},					
 422 																				-- month-initial: month day, year
 423 	['Mdy'] = {'^(%D-) +([1-9]%d?), +((%d%d%d%d?)%a?)$', 'm', 'd', 'a', 'y'},
 424 																				-- month-initial day range: month day–day, year; days are separated by endash
 425 	['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'd2', 'a', 'y'},
 426 																				-- day-initial: day month year
 427 	['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'},
 428 																				-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki
 429 	--	['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'},
 430 																				-- day-range-initial: day–day month year; days are separated by endash
 431 	['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'},
 432 																				-- day initial month-day-range: day month - day month year; uses spaced endash
 433 	['dM-dMy'] = {'^([1-9]%d?) +(%D-) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'd2', 'm2', 'a', 'y'},
 434 																				-- month initial month-day-range: month day – month day, year;  uses spaced endash
 435 	['Md-Mdy'] = {'^(%D-) +([1-9]%d?) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$','m', 'd', 'm2', 'd2', 'a', 'y'},
 436 																				-- day initial month-day-year-range: day month year - day month year; uses spaced endash
 437 	['dMy-dMy'] = {'^([1-9]%d?) +(%D-) +(%d%d%d%d) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'y', 'd2', 'm2', 'a', 'y2'},
 438 																				-- month initial month-day-year-range: month day, year – month day, year;  uses spaced endash
 439 	['Mdy-Mdy'] = {'^(%D-) +([1-9]%d?), +(%d%d%d%d) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'y', 'm2', 'd2', 'a', 'y2'},
 440 
 441 																				-- these date formats cannot be converted, per se, but month name can be rendered short or long
 442 																				-- month/season year - month/season year; separated by spaced endash
 443 	['My-My'] = {'^(%D-) +(%d%d%d%d) +[%-–] +(%D-) +((%d%d%d%d)%a?)$', 'm', 'y', 'm2', 'a', 'y2'},
 444 																				-- month/season range year; months separated by endash
 445 	['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'},
 446 																				-- month/season year or proper-name year; quarter year when First Quarter YYYY etc.
 447 	['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'},					-- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't
 448 
 449 																				-- these date formats cannot be converted
 450 	['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'},						-- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
 451 	['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'},						-- special case Winter/Summer year-year; year separated with unspaced endash
 452 	['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'},							-- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
 453 	['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'},								-- year range: YYYY–YY; separated by unspaced endash
 454 	['ymx'] = {'^(%d%d%d%d)%-(%d%d)%-XX$', 'y', 'm'},							-- edtf year-initial numerical year-month-XX
 455 	['y'] = {'^((%d%d%d%d?)%a?)$'},												-- year; here accept either YYY or YYYY
 456 	}
 457 
 458 
 459 --[[--------------------------< C H E C K _ D A T E >----------------------------------------------------------
 460 
 461 Check date format to see that it is one of the formats approved by WP:DATESNO or WP:DATERANGE. Exception: only
 462 allowed range separator is endash.  Additionally, check the date to see that it is a real date: no 31 in 30-day
 463 months; no 29 February when not a leap year.  Months, both long-form and three character abbreviations, and seasons
 464 must be spelled correctly.  Future years beyond next year are not allowed.
 465 
 466 If the date fails the format tests, this function returns false and does not return values for anchor_year and
 467 COinS_date.  When this happens, the date parameter is (DEBUG: not?) used in the COinS metadata and the CITEREF identifier gets
 468 its year from the year parameter if present otherwise CITEREF does not get a date value.
 469 
 470 Inputs:
 471 	date_string - date string from date-holding parameters (date, year, publication-date, access-date, pmc-embargo-date, archive-date, lay-date)
 472 
 473 Returns:
 474 	false if date string is not a real date; else
 475 	true, anchor_year, COinS_date
 476 		anchor_year can be used in CITEREF anchors
 477 		COinS_date is ISO 8601 format date; see make_COInS_date()
 478 
 479 ]]
 480 
 481 local function check_date (date_string, param, tCOinS_date)
 482 	local year;																	-- assume that year2, months, and days are not used;
 483 	local year2 = 0;															-- second year in a year range
 484 	local month = 0;
 485 	local month2 = 0;															-- second month in a month range
 486 	local day = 0;
 487 	local day2 = 0;																-- second day in a day range
 488 	local anchor_year;
 489 	local coins_date;
 490 
 491 	if date_string:match (patterns['ymd'][1]) then								-- year-initial numerical year month day format
 492 		year, month, day = date_string:match (patterns['ymd'][1]);
 493 		if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end	-- month or day number not valid or not Gregorian calendar
 494 		anchor_year = year;
 495 	
 496 	elseif date_string:match (patterns['ymx'][1]) then							-- year-initial numerical year month edtf format
 497 		year, month = date_string:match (patterns['ymx'][1]);
 498 		if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or not is_valid_year(year) then return false; end	-- month number not valid or not Gregorian calendar or future year
 499 		anchor_year = year;
 500 
 501 	elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then				-- month-initial: month day, year
 502 		month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]);
 503 		month = get_month_number (month);
 504 		if 0 == month then return false; end									-- return false if month text isn't one of the twelve months
 505 				
 506 	elseif mw.ustring.match(date_string, patterns['Md-dy'][1]) then				-- month-initial day range: month day–day, year; days are separated by endash
 507 		month, day, day2, anchor_year, year = mw.ustring.match(date_string, patterns['Md-dy'][1]);
 508 		if tonumber(day) >= tonumber(day2) then return false; end				-- date range order is left to right: earlier to later; dates may not be the same;
 509 		month = get_month_number (month);
 510 		if 0 == month then return false; end									-- return false if month text isn't one of the twelve months
 511 		month2=month;															-- for metadata
 512 		year2 = year;
 513 
 514 	elseif mw.ustring.match(date_string, patterns['dMy'][1]) then				-- day-initial: day month year
 515 		day, month, anchor_year, year = mw.ustring.match(date_string, patterns['dMy'][1]);
 516 		month = get_month_number (month);
 517 		if 0 == month then return false; end									-- return false if month text isn't one of the twelve months
 518 
 519 --[[ NOT supported at en.wiki
 520 	elseif mw.ustring.match(date_string, patterns['yMd'][1]) then				-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed
 521 		anchor_year, year, month, day = mw.ustring.match(date_string, patterns['yMd'][1]);
 522 		month = get_month_number (month);
 523 		if 0 == month then return false; end									-- return false if month text isn't one of the twelve months
 524 -- end NOT supported at en.wiki ]]
 525 
 526 	elseif mw.ustring.match(date_string, patterns['d-dMy'][1]) then				-- day-range-initial: day–day month year; days are separated by endash
 527 		day, day2, month, anchor_year, year = mw.ustring.match(date_string, patterns['d-dMy'][1]);
 528 		if tonumber(day) >= tonumber(day2) then return false; end				-- date range order is left to right: earlier to later; dates may not be the same;
 529 		month = get_month_number (month);
 530 		if 0 == month then return false; end									-- return false if month text isn't one of the twelve months
 531 		month2 = month;															-- for metadata
 532 		year2 = year;
 533 
 534 	elseif mw.ustring.match(date_string, patterns['dM-dMy'][1]) then			-- day initial month-day-range: day month - day month year; uses spaced endash
 535 		day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, patterns['dM-dMy'][1]);
 536 		if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end	-- date range order is left to right: earlier to later;
 537 		month = get_month_number (month);										-- for metadata
 538 		month2 = get_month_number (month2);
 539 		year2 = year;
 540 
 541 	elseif mw.ustring.match(date_string, patterns['Md-Mdy'][1]) then			-- month initial month-day-range: month day – month day, year; uses spaced endash
 542 		month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, patterns['Md-Mdy'][1]);
 543 		if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end
 544 		month = get_month_number (month);										-- for metadata
 545 		month2 = get_month_number (month2);
 546 		year2 = year;
 547 
 548 	elseif mw.ustring.match(date_string, patterns['dMy-dMy'][1]) then			-- day initial month-day-year-range: day month year - day month year; uses spaced endash
 549 		day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['dMy-dMy'][1]);
 550 		if tonumber(year2) <= tonumber(year) then return false; end				-- must be sequential years, left to right, earlier to later
 551 		if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end		-- year2 no more than one year in the future; months same style
 552 		month = get_month_number (month);										-- for metadata
 553 		month2 = get_month_number (month2);
 554 		if 0 == month or 0 == month2 then return false; end						-- both must be valid
 555 
 556 	elseif mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]) then			-- month initial month-day-year-range: month day, year – month day, year; uses spaced endash
 557 		month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]);
 558 		if tonumber(year2) <= tonumber(year) then return false; end				-- must be sequential years, left to right, earlier to later
 559 		if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end		-- year2 no more than one year in the future; months same style
 560 		month = get_month_number (month);										-- for metadata
 561 		month2 = get_month_number(month2);
 562 		if 0 == month or 0 == month2 then return false; end						-- both must be valid
 563 
 564 	elseif mw.ustring.match(date_string, patterns['Sy4-y2'][1]) then			-- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash
 565 		local century;
 566 		month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]);
 567 		if 'Winter' ~= month and 'Summer' ~= month then return false end;		-- 'month' can only be Winter or Summer
 568 		anchor_year = year .. '–' .. anchor_year;								-- assemble anchor_year from both years
 569 		year2 = century..year2;													-- add the century to year2 for comparisons
 570 		if 1 ~= tonumber(year2) - tonumber(year) then return false; end			-- must be sequential years, left to right, earlier to later
 571 		if not is_valid_year(year2) then return false; end						-- no year farther in the future than next year
 572 		month = get_season_number(month, param);
 573 
 574 	elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then				-- special case Winter/Summer year-year; year separated with unspaced endash
 575 		month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]);
 576 		if 'Winter' ~= month and 'Summer' ~= month then return false end;		-- 'month' can only be Winter or Summer
 577 		anchor_year = year .. '–' .. anchor_year;										-- assemble anchor_year from both years
 578 		if 1 ~= tonumber(year2) - tonumber(year) then return false; end			-- must be sequential years, left to right, earlier to later
 579 		if not is_valid_year(year2) then return false; end						-- no year farther in the future than next year
 580 		month = get_season_number (month, param);								-- for metadata
 581 
 582 	elseif mw.ustring.match(date_string, patterns['My-My'][1]) then				-- month/season year - month/season year; separated by spaced endash
 583 		month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]);
 584 		anchor_year = year .. '–' .. anchor_year;								-- assemble anchor_year from both years
 585 		if tonumber(year) >= tonumber(year2) then return false; end				-- left to right, earlier to later, not the same
 586 		if not is_valid_year(year2) then return false; end						-- no year farther in the future than next year
 587 		if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then 	-- both must be month year, same month style
 588 			month = get_month_number(month);
 589 			month2 = get_month_number(month2);
 590 		elseif 0 ~= get_season_number(month, param) and 0 ~= get_season_number(month2, param) then	-- both must be season year, not mixed
 591 			month = get_season_number(month, param);
 592 			month2 = get_season_number(month2, param);
 593 		else
 594 			 return false;
 595 		end
 596 
 597 	elseif mw.ustring.match(date_string, patterns['M-My'][1]) then				-- month/season range year; months separated by endash 
 598 		month, month2, anchor_year, year = mw.ustring.match(date_string, patterns['M-My'][1]);
 599 		if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end
 600 		if 0 ~= get_month_number(month) then									-- determined to be a valid range so just check this one to know if month or season
 601 			month = get_month_number(month);
 602 			month2 = get_month_number(month2);
 603 			if 0 == month or 0 == month2 then return false; end
 604 		else
 605 			month = get_season_number(month, param);
 606 			month2 = get_season_number(month2, param);
 607 		end
 608 		year2 = year;
 609 		
 610 	elseif mw.ustring.match(date_string, patterns['My'][1]) then				-- month/season/quarter/proper-name year
 611 		month, anchor_year, year = mw.ustring.match(date_string, patterns['My'][1]);
 612 		if not is_valid_year(year) then return false; end
 613 		month = get_element_number(month, param);								-- get month season quarter proper-name number or nil
 614 		if not month then return false; end										-- not valid whatever it is
 615 
 616 	elseif mw.ustring.match(date_string, patterns['y-y'][1]) then				-- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
 617 		year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]);
 618 		anchor_year = year .. '–' .. anchor_year;								-- assemble anchor year from both years
 619 		if tonumber(year) >= tonumber(year2) then return false; end				-- left to right, earlier to later, not the same
 620 		if not is_valid_year(year2) then return false; end						-- no year farther in the future than next year
 621 
 622 	elseif mw.ustring.match(date_string, patterns['y4-y2'][1]) then				-- Year range: YYYY–YY; separated by unspaced endash
 623 		local century;
 624 		year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]);
 625 		anchor_year = year .. '–' .. anchor_year;								-- assemble anchor year from both years
 626 
 627 		if in_array (param, {'date', 'publication-date', 'year'}) then
 628 			add_prop_cat ('year_range_abbreviated');
 629 		end
 630 
 631 		if 13 > tonumber(year2) then return false; end							-- don't allow 2003-05 which might be May 2003
 632 		year2 = century .. year2;													-- add the century to year2 for comparisons
 633 		if tonumber(year) >= tonumber(year2) then return false; end				-- left to right, earlier to later, not the same
 634 		if not is_valid_year(year2) then return false; end						-- no year farther in the future than next year
 635 
 636 	elseif mw.ustring.match(date_string, patterns['y'][1]) then					-- year; here accept either YYY or YYYY
 637 		anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]);
 638 		if false == is_valid_year(year) then
 639 			return false;
 640 		end
 641 
 642 	else
 643 		return false;															-- date format not one of the MOS:DATE approved formats
 644 	end
 645 
 646 	if 'access-date' == param then												-- test accessdate here because we have numerical date parts
 647 		if 0 ~= year and 0 ~= month and 0 ~= day and 							-- all parts of a single date required
 648 			0 == year2 and 0 == month2 and 0 == day2 then						-- none of these; accessdate must not be a range
 649 				if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then	
 650 					return false;												-- return false when accessdate out of bounds
 651 				end
 652 		else
 653 			return false;														-- return false when accessdate is a range of two dates
 654 		end
 655 	end
 656 
 657 	local result=true;															-- check whole dates for validity; assume true because not all dates will go through this test
 658 	if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then		-- YMD (simple whole date)
 659 		result = is_valid_date(year, month, day);
 660 
 661 	elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then	-- YMD-d (day range)
 662 		result = is_valid_date(year, month, day);
 663 		result = result and is_valid_date(year, month, day2);
 664 
 665 	elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then	-- YMD-md (day month range)
 666 		result = is_valid_date(year, month, day);
 667 		result = result and is_valid_date(year, month2, day2);
 668 
 669 	elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then	-- YMD-ymd (day month year range)
 670 		result = is_valid_date(year, month, day);
 671 		result = result and is_valid_date(year2, month2, day2);
 672 	end
 673 	
 674 	if false == result then return false; end
 675 
 676 	if nil ~= tCOinS_date then													-- this table only passed into this function when testing |date= parameter values
 677 		make_COinS_date ({year = year, month = month, day = day, year2 = year2, month2 = month2, day2 = day2}, tCOinS_date);	-- make an ISO 8601 date string for COinS
 678 	end
 679 	
 680 	return true, anchor_year;													-- format is good and date string represents a real date
 681 end	
 682 
 683 
 684 --[[--------------------------< D A T E S >--------------------------------------------------------------------
 685 
 686 Cycle the date-holding parameters in passed table date_parameters_list through check_date() to check compliance with MOS:DATE. For all valid dates, check_date() returns
 687 true. The |date= parameter test is unique, it is the only date holding parameter from which values for anchor_year (used in CITEREF identifiers) and COinS_date (used in
 688 the COinS metadata) are derived.  The |date= parameter is the only date-holding parameter that is allowed to contain the no-date keywords "n.d." or "nd" (without quotes).
 689 
 690 Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially,
 691 parameters with errors are added to the <error_list> sequence table as the dates are tested.
 692 
 693 ]]
 694 
 695 local function dates(date_parameters_list, tCOinS_date, error_list)
 696 	local anchor_year;															-- will return as nil if the date being tested is not |date=
 697 	local COinS_date;															-- will return as nil if the date being tested is not |date=
 698 	local embargo_date;															-- if embargo date is a good dmy, mdy, ymd date then holds original value else reset to 9999
 699 	local good_date = false;
 700 
 701 	for k, v in pairs(date_parameters_list) do									-- for each date-holding parameter in the list
 702 		if is_set(v.val) then													-- if the parameter has a value
 703 			v.val = mw.ustring.gsub(v.val, '%d', cfg.date_names.local_digits);	-- translate 'local' digits to Western 0-9
 704 			if v.val:match("^c%. [1-9]%d%d%d?%a?$") then						-- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
 705 				local year = v.val:match("c%. ([1-9]%d%d%d?)%a?");				-- get the year portion so it can be tested
 706 				if 'date' == k then
 707 					anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%a?)");	-- anchor year and COinS_date only from |date= parameter
 708 					good_date = is_valid_year(year);
 709 				elseif 'year' == k then
 710 					good_date = is_valid_year(year);
 711 				end
 712 			elseif 'date' == k then												-- if the parameter is |date=
 713 				if v.val:match("^n%.d%.%a?$") then -- ToDo: I18N								-- if |date=n.d. with or without a CITEREF disambiguator
 714 					good_date, anchor_year, COinS_date = true, v.val:match("((n%.d%.)%a?)"); -- ToDo: I18N	-- "n.d."; no error when date parameter is set to no date
 715 				elseif v.val:match("^nd%a?$") then -- ToDo: I18N								-- if |date=nd with or without a CITEREF disambiguator
 716 					good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); -- ToDo: I18N	-- "nd";	no error when date parameter is set to no date
 717 				else
 718 					good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date);	-- go test the date
 719 				end
 720 			elseif 'year' == k then												-- if the parameter is |year= it should hold only a year value
 721 				if v.val:match("^[1-9]%d%d%d?%a?$") then						-- if |year = 3 or 4 digits only with or without a CITEREF disambiguator
 722 					good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)");
 723 				end
 724 			elseif 'pmc-embargo-date' == k then											-- if the parameter is |pmc-embargo-date=
 725 				good_date = check_date (v.val, k);								-- go test the date
 726 				if true == good_date then										-- if the date is a valid date
 727 					good_date, embargo_date = is_valid_embargo_date (v.val);	-- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo; no: returns 9999
 728 				end
 729 			else																-- any other date-holding parameter
 730 				good_date = check_date (v.val, k);								-- go test the date
 731 			end
 732 			if false == good_date then											-- assemble one error message so we don't add the tracking category multiple times
 733 				table.insert (error_list, wrap_style ('parameter', v.name));	-- make parameter name suitable for error message list
 734 			end
 735 		end
 736 	end
 737 	return anchor_year, embargo_date;											-- and done
 738 end
 739 
 740 
 741 --[[--------------------------< Y E A R _ D A T E _ C H E C K >------------------------------------------------
 742 
 743 Compare the value provided in |year= with the year value(s) provided in |date=.  This function sets a local numeric value:
 744 	0 - year value does not match the year value in date
 745 	1 - (default) year value matches the year value in date or one of the year values when date contains two years
 746 	2 - year value matches the year value in date when date is in the form YYYY-MM-DD and year is disambiguated (|year=YYYYx)
 747 
 748 the numernic value in <result> determines the 'output' if any from this function:
 749 	0 – adds error message to error_list sequence table
 750 	1 – adds maint cat
 751 	2 – does nothing
 752 
 753 ]]
 754 
 755 local function year_date_check (year_string, year_origin, date_string, date_origin, error_list)
 756 	local year;
 757 	local date1;
 758 	local date2;
 759 	local result = 1;															-- result of the test; assume that the test passes
 760 
 761 	year = year_string:match ('(%d%d%d%d?)');
 762 
 763 	if date_string:match ('%d%d%d%d%-%d%d%-%d%d') and year_string:match ('%d%d%d%d%a') then	--special case where both date and year are required YYYY-MM-DD and YYYYx
 764 		date1 = date_string:match ('(%d%d%d%d)');
 765 		year = year_string:match ('(%d%d%d%d)');
 766 		if year ~= date1 then
 767 			result = 0;															-- years don't match
 768 		else
 769 			result = 2;															-- years match; but because disambiguated, don't add to maint cat
 770 		end
 771 		
 772 	elseif date_string:match ("%d%d%d%d?.-%d%d%d%d?") then						-- any of the standard range formats of date with two three- or four-digit years
 773 		date1, date2 = date_string:match ("(%d%d%d%d?).-(%d%d%d%d?)");
 774 		if year ~= date1 and year ~= date2 then
 775 			result = 0;
 776 		end
 777 
 778 	elseif mw.ustring.match(date_string, "%d%d%d%d[%-–]%d%d") then				-- YYYY-YY date ranges
 779 		local century;
 780 		date1, century, date2 = mw.ustring.match(date_string, "((%d%d)%d%d)[%-–]+(%d%d)");
 781 		date2 = century..date2;													-- convert YY to YYYY
 782 		if year ~= date1 and year ~= date2 then
 783 			result = 0;
 784 		end
 785 
 786 	elseif date_string:match ("%d%d%d%d?") then									-- any of the standard formats of date with one year
 787 		date1 = date_string:match ("(%d%d%d%d?)");
 788 		if year ~= date1 then
 789 			result = 0;
 790 		end
 791 	else																		-- should never get here; this function called only when no other date errors
 792 		result = 0;																-- no recognizable year in date
 793 	end
 794 
 795 	if 0 == result then															-- year / date mismatch
 796 		table.insert (error_list, substitute (cfg.messages['mismatch'], {year_origin, date_origin}));	-- add error message to error_list sequence table
 797 	elseif 1 == result then														-- redundant year / date
 798 		set_message ('maint_date_year');										-- add a maint cat
 799 	end
 800 end
 801 
 802 
 803 --[[--------------------------< R E F O R M A T T E R >--------------------------------------------------------
 804 
 805 reformat 'date' into new format specified by format_param if pattern_idx (the current format of 'date') can be
 806 reformatted.  Does the grunt work for reformat_dates().
 807 
 808 The table re_formats maps pattern_idx (current format) and format_param (desired format) to a table that holds:
 809 	format string used by string.format()
 810 	identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures
 811 		from mw.ustring.match() for the various date parts specified by  patterns[pattern_idx][1]
 812 
 813 Items in patterns{} have the general form:
 814 	['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where:
 815 		['ymd'] is pattern_idx
 816 		patterns['ymd'][1] is the match pattern with captures for mw.ustring.match()
 817 		patterns['ymd'][2] is an indicator letter identifying the content of the first capture
 818 		patterns['ymd'][3] ... the second capture etc.
 819 
 820 when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier
 821 characters as indexes into t{}  For the above, a ymd date is in t{} as:
 822 	t.y = first capture (year), t.m = second capture (month), t.d = third capture (day)
 823 
 824 To reformat, this function is called with the pattern_idx that matches the current format of the date and with
 825 format_param set to the desired format.  This function loads table t{} as described and then calls string.format()
 826 with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according
 827 to the capture identifier letters specified by patterns[pattern_idx][format_param][n] where n is 2..
 828 
 829 ]]
 830 
 831 local re_formats = {
 832 	['ymd'] = {																	-- date format is ymd; reformat to:
 833 		['mdy'] = {'%s %s, %s', 'm', 'd', 'y'},									-- |df=mdy
 834 		['dmy'] = {'%s %s %s', 'd', 'm', 'y'},									-- |df=dmy
 835 	--		['yMd'] = {'%s %s %s', 'y', 'm', 'd'},								-- |df=yMd; not supported at en.wiki
 836 		},
 837 	['Mdy'] = {																	-- date format is Mdy; reformat to:
 838 		['mdy'] = {'%s %s, %s', 'm', 'd', 'y'},									-- for long/short reformatting
 839 		['dmy'] = {'%s %s %s', 'd', 'm', 'y'},									-- |df=dmy
 840 		['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'},									-- |df=ymd
 841 	--		['yMd'] = {'%s %s %s', 'y', 'm', 'd'},								-- |df=yMd; not supported at en.wiki
 842 		},
 843 	['dMy'] = {																	-- date format is dMy; reformat to:
 844 		['dmy'] = {'%s %s %s', 'd', 'm', 'y'},									-- for long/short reformatting
 845 		['mdy'] = {'%s %s, %s', 'm', 'd', 'y'},									-- |df=mdy
 846 		['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'},									-- |df=ymd
 847 	--		['yMd'] = {'%s %s %s', 'y', 'm', 'd'},								-- |df=yMd; not supported at en.wiki
 848 		},
 849 	['Md-dy'] = {																-- date format is Md-dy; reformat to:
 850 		['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'},						-- for long/short reformatting
 851 		['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'},							-- |df=dmy -> d-dMy 
 852 		},
 853 	['d-dMy'] = {																-- date format is d-d>y; reformat to:
 854 		['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'},							-- for long/short reformatting
 855 		['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'},						-- |df=mdy -> Md-dy 
 856 		},
 857 	['dM-dMy'] = {																-- date format is dM-dMy; reformat to:
 858 		['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'},				-- for long/short reformatting
 859 		['mdy'] = {'%s %s – %s %s, %s', 'm', 'd', 'm2', 'd2', 'y'},				-- |df=mdy -> Md-Mdy 
 860 		},
 861 	['Md-Mdy'] = {																-- date format is Md-Mdy; reformat to:
 862 		['mdy'] = {'%s %s – %s %s, %s', 'm', 'd',  'm2', 'd2', 'y'},			-- for long/short reformatting
 863 		['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'},				-- |df=dmy -> dM-dMy 
 864 		},
 865 	['dMy-dMy'] = {																-- date format is dMy-dMy; reformat to:
 866 		['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'},		-- for long/short reformatting
 867 		['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'},	-- |df=mdy -> Mdy-Mdy 
 868 		},
 869 	['Mdy-Mdy'] = {																-- date format is Mdy-Mdy; reformat to:
 870 		['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'},	-- for long/short reformatting
 871 		['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'},		-- |df=dmy -> dMy-dMy 
 872 		},
 873 	['My-My'] = {																-- these for long/short reformatting
 874 		['any'] = {'%s %s – %s %s', 'm', 'y', 'm2', 'y2'},						-- dmy/mdy agnostic
 875 		},
 876 	['M-My'] = {																-- these for long/short reformatting
 877 		['any'] = {'%s–%s %s', 'm', 'm2', 'y'},									-- dmy/mdy agnostic
 878 		},
 879 	['My'] = {																	-- these for long/short reformatting
 880 		['any'] = {'%s %s', 'm', 'y'},											-- dmy/mdy agnostic
 881 		},
 882 	--	['yMd'] = {																-- not supported at en.wiki
 883 	--		['mdy'] = {'%s %s, %s', 'm', 'd', 'y'},								-- |df=mdy
 884 	--		['dmy'] = {'%s %s %s', 'd', 'm', 'y'},								-- |df=dmy
 885 	--		['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'},								-- |df=ymd
 886 	--		},
 887 	}
 888 
 889 
 890 local function reformatter (date, pattern_idx, format_param, mon_len)
 891 	if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then
 892 		return;																	-- not in this set of date format patterns then not a reformattable date
 893 	end
 894 	
 895 	if 'ymd' == format_param and in_array (pattern_idx, {'ymd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then
 896 		return;																	-- ymd date ranges not supported at en.wiki; no point in reformatting ymd to ymd
 897 	end
 898 
 899 	if in_array (pattern_idx, {'My', 'M-My', 'My-My'}) then						-- these are not dmy/mdy so can't be 'reformatted' into either
 900 		format_param = 'any';													-- so format-agnostic 
 901 	end
 902 
 903 																				-- yMd is not supported at en.wiki; if yMd is supported at your wiki, uncomment the next line
 904 	--	if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then	-- these formats not convertable; yMd not supported at en.wiki
 905 	-- if yMd is supported at your wiki, remove or comment-out the next line
 906 	if 'yMd' == format_param then												-- yMd not supported at en.wiki
 907 		return;																	-- not a reformattable date
 908 	end
 909 	
 910 	local c1, c2, c3, c4, c5, c6, c7;											-- these hold the captures specified in patterns[pattern_idx][1]
 911 	c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, patterns[pattern_idx][1]);	-- get the captures
 912 
 913 	local t = {																	-- table that holds k/v pairs of date parts from the captures and patterns[pattern_idx][2..]
 914 		[patterns[pattern_idx][2]] = c1;										-- at minimum there is always one capture with a matching indicator letter
 915 		[patterns[pattern_idx][3] or 'x'] = c2;									-- patterns can have a variable number of captures; each capture requires an indicator letter;
 916 		[patterns[pattern_idx][4] or 'x'] = c3;									-- where there is no capture, there is no indicator letter so n in patterns[pattern_idx][n] will be nil;
 917 		[patterns[pattern_idx][5] or 'x'] = c4;									-- the 'x' here spoofs an indicator letter to prevent 'table index is nil' error
 918 		[patterns[pattern_idx][6] or 'x'] = c5;
 919 		[patterns[pattern_idx][7] or 'x'] = c6;
 920 		[patterns[pattern_idx][8] or 'x'] = c7;
 921 		};
 922 
 923 	if t.a then																	-- if this date has an anchor year capture
 924 		t.y = t.a;																-- use the anchor year capture when reassembling the date
 925 	end
 926 
 927 	if tonumber(t.m) then														-- if raw month is a number (converting from ymd)
 928 		if 's' == mon_len then													-- if we are to use abbreviated month names
 929 			t.m = cfg.date_names['inv_local_s'][tonumber(t.m)];					-- convert it to a month name
 930 		else
 931 			t.m = cfg.date_names['inv_local_l'][tonumber(t.m)];					-- convert it to a month name
 932 		end
 933 		t.d = t.d:gsub ('0(%d)', '%1');											-- strip leading '0' from day if present
 934 	elseif 'ymd' == format_param then											-- when converting to ymd
 935 		t.y = t.y:gsub ('%a', '');												-- strip CITREF disambiguator if present; anchor year already known so process can proceed
 936 		if 1582 > tonumber (t.y) then											-- ymd format dates not allowed before 1582
 937 			return;
 938 		end
 939 		t.m = string.format ('%02d', get_month_number (t.m));					-- make sure that month and day are two digits
 940 		t.d = string.format ('%02d', t.d);
 941 	elseif mon_len then															-- if mon_len is set to either 'short' or 'long'
 942 		for _, mon in ipairs ({'m', 'm2'}) do									-- because there can be two month names, check both 
 943 			if t[mon] then
 944 				t[mon] = get_month_number (t[mon]);								-- get the month number for this month (is length agnostic)
 945 				if 0 == t[mon] then return; end									-- seasons and named dates can't be converted
 946 				t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_s'][t[mon]]) or cfg.date_names['inv_local_l'][t[mon]];	-- fetch month name according to length
 947 			end
 948 		end
 949 	end
 950 
 951 	local new_date = string.format (re_formats[pattern_idx][format_param][1],	-- format string
 952 		t[re_formats[pattern_idx][format_param][2]],							-- named captures from t{}
 953 		t[re_formats[pattern_idx][format_param][3]],
 954 		t[re_formats[pattern_idx][format_param][4]],
 955 		t[re_formats[pattern_idx][format_param][5]],
 956 		t[re_formats[pattern_idx][format_param][6]],
 957 		t[re_formats[pattern_idx][format_param][7]],
 958 		t[re_formats[pattern_idx][format_param][8]]
 959 		);
 960 
 961 	return new_date;
 962 end
 963 
 964 
 965 --[[-------------------------< R E F O R M A T _ D A T E S >--------------------------------------------------
 966 
 967 Reformats existing dates into the format specified by format.
 968 
 969 format is one of several manual keywords: dmy, dmy-all, mdy, mdy-all, ymd, ymd-all.  The -all version includes
 970 access- and archive-dates; otherwise these dates are not reformatted.
 971 
 972 This function allows automatic date formatting.  In ~/Configuration, the article source is searched for one of
 973 the {{use xxx dates}} templates.  If found, xxx becomes the global date format as xxx-all.  If |cs1-dates= in
 974 {{use xxx dates}} has legitimate value then that value determines how cs1|2 dates will be rendered.  Legitimate
 975 values for |cs1-dates= are:
 976 	l - all dates are rendered with long month names
 977 	ls - publication dates use long month names; access-/archive-dates use abbreviated month names
 978 	ly - publication dates use long month names; access-/archive-dates rendered in ymd format
 979 	s - all dates are rendered with abbreviated (short) month names
 980 	sy - publication dates use abbreviated month names; access-/archive-dates rendered in ymd format
 981 	y - all dates are rendered in ymd format
 982 
 983 the format argument for automatic date formatting will be the format specified by {{use xxx dates}} with the
 984 value supplied by |cs1-dates so one of: xxx-l, xxx-ls, xxx-ly, xxx-s, xxx-sy, xxx-y, or simply xxx (|cs1-dates=
 985 empty, omitted, or invalid) where xxx shall be either of dmy or mdy.
 986 
 987 dates are extracted from date_parameters_list, reformatted (if appropriate), and then written back into the
 988 list in the new format.  Dates in date_parameters_list are presumed here to be valid (no errors).  This function
 989 returns true when a date has been reformatted, false else.  Actual reformatting is done by reformatter().
 990 
 991 ]]
 992 
 993 local function reformat_dates (date_parameters_list, format)
 994 	local all = false;															-- set to false to skip access- and archive-dates
 995 	local len_p = 'l';															-- default publication date length shall be long
 996 	local len_a = 'l';															-- default access-/archive-date length shall be long
 997 	local result = false;
 998 	local new_date;																
 999 	
1000 	if format:match('%a+%-all') then											-- manual df keyword; auto df keyword when length not specified in {{use xxx dates}}; 
1001 		format = format:match('(%a+)%-all');									-- extract the format
1002 		all = true;																-- all dates are long format dates because this keyword doesn't specify length
1003 	elseif format:match('%a+%-[lsy][sy]?') then									-- auto df keywords; internal only
1004 		all = true;																-- auto df applies to all dates; use length specified by capture len_p for all dates
1005 		format, len_p, len_a = format:match('(%a+)%-([lsy])([sy]?)');			-- extract the format and length keywords
1006 		if 'y' == len_p then													-- because allowed by MOS:DATEUNIFY (sort of) range dates and My dates not reformatted
1007 			format = 'ymd';														-- override {{use xxx dates}}
1008 		elseif (not is_set(len_a)) or (len_p == len_a) then						-- no access-/archive-date length specified or same length as publication dates then
1009 			len_a = len_p;														-- in case len_a not set
1010 		end
1011 	end																			-- else only publication dates and they are long
1012 
1013 	for param_name, param_val in pairs (date_parameters_list) do				-- for each date-holding parameter in the list
1014 		if is_set (param_val.val) then											-- if the parameter has a value
1015 			if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then	-- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way
1016 				for pattern_idx, pattern in pairs (patterns) do
1017 					if mw.ustring.match (param_val.val, pattern[1]) then
1018 						if all and in_array (param_name, {'access-date', 'archive-date'}) then	-- if this date is an access- or archive-date
1019 							new_date = reformatter (param_val.val, pattern_idx, (('y' == len_a) and 'ymd') or format, len_a);	-- choose ymd or dmy/mdy according to len_a setting
1020 						else													-- all other dates
1021 							new_date = reformatter (param_val.val, pattern_idx, format, len_p);
1022 						end
1023 						
1024 						if new_date then										-- set when date was reformatted
1025 							date_parameters_list[param_name].val = new_date;	-- update date in date list
1026 							result = true;										-- and announce that changes have been made
1027 						end
1028 					end	-- if
1029 				end		-- for
1030 			end			-- if
1031 		end				-- if
1032 	end					-- for
1033 return result;																	-- declare boolean result and done
1034 end
1035 
1036 
1037 --[[--------------------------< D A T E _ H Y P H E N _ T O _ D A S H >----------------------------------------
1038 
1039 Loops through the list of date-holding parameters and converts any hyphen to an ndash.  Not called if the cs1|2
1040 template has any date errors.
1041 
1042 Modifies the date_parameters_list and returns true if hyphens are replaced, else returns false.
1043 
1044 ]]
1045 
1046 local function date_hyphen_to_dash (date_parameters_list)
1047 	local result = false;
1048 	local n;
1049 	for param_name, param_val in pairs(date_parameters_list) do					-- for each date-holding parameter in the list
1050 		if is_set (param_val.val) and
1051 			not mw.ustring.match (param_val.val, patterns.ymd[1]) then			-- for those that are not ymd dates (ustring because here digits may not be Western)
1052 				param_val.val, n = param_val.val:gsub ('%-', '–');				-- replace any hyphen with ndash
1053 				if 0 ~= n then
1054 					date_parameters_list[param_name].val = param_val.val;		-- update the list
1055 					result = true;
1056 				end
1057 		end
1058 	end
1059 	return result;																-- so we know if any hyphens were replaced
1060 end
1061 
1062 
1063 --[[--------------------------< E D T F _ T R A N S F O R M >--------------------------------------------------
1064 
1065 Loops through the list of date-holding parameters and converts any EDTF formatted dates to MOS compliant dates.
1066 Only YYY-MM-XX supported at this time. Not called if the cs1|2 template has any date errors.
1067 
1068 must be done before reformat_dates() and before date_hyphen_to_dash()
1069 
1070 Modifies the date_parameters_list and returns true if transformation is performed, else returns false.
1071 
1072 ]]
1073 
1074 local function edtf_transform (date_parameters_list)
1075 	local result = false;
1076 	local source_date = {};
1077 
1078 	for param_name, param_val in pairs(date_parameters_list) do					-- for each date-holding parameter in the list
1079 		if is_set(param_val.val) and param_val.val:match (patterns.ymx[1]) then	-- if parameter is set and is an EDTF dates
1080 			source_date.year, source_date.month = param_val.val:match (patterns.ymx[1]);	-- get year and month number
1081 			source_date.day = 1;												-- required by os.time()
1082 			date_parameters_list[param_name].val = mw.text.trim (os.date ('%B %Y', os.time (source_date)));
1083 			result = true;
1084 		end
1085 	end
1086 	return result;																-- so we know if a transform was done
1087 end
1088 
1089 
1090 --[[-------------------------< D A T E _ N A M E _ X L A T E >------------------------------------------------
1091 
1092 Attempts to translate English month names to local-language month names using names supplied by MediaWiki's
1093 date parser function.  This is simple name-for-name replacement and may not work for all languages.
1094 
1095 if xlat_dig is true, this function will also translate Western (English) digits to the local language's digits.
1096 This will also translate ymd dates.
1097 
1098 ]]
1099 
1100 local function date_name_xlate (date_parameters_list, xlt_dig)
1101 	local xlate;
1102 	local mode;																	-- long or short month names
1103 	local modified = false;
1104 	local date;
1105 	
1106 	for param_name, param_val in pairs(date_parameters_list) do					-- for each date-holding parameter in the list
1107 		if is_set(param_val.val) then											-- if the parameter has a value
1108 			date = param_val.val;
1109 			for month in mw.ustring.gmatch (date, '%a+') do						-- iterate through all dates in the date (single date or date range)
1110 				if cfg.date_names.en.long[month] then
1111 					mode = 'F';													-- English name is long so use long local name
1112 				elseif cfg.date_names.en.short[month] then
1113 					mode = 'M';													-- English name is short so use short local name
1114 				else
1115 					mode = nil;													-- not an English month name; could be local language month name or an English season name
1116 				end
1117 		
1118 				if mode then													-- might be a season
1119 					xlate = lang_object:formatDate(mode, '1' .. month);			-- translate the month name to this local language
1120 					date = mw.ustring.gsub (date, month, xlate);				-- replace the English with the translation
1121 					date_parameters_list[param_name].val = date;				-- save the translated date
1122 					modified = true;
1123 				end
1124 			end
1125 
1126 			if xlt_dig then														-- shall we also translate digits?
1127 				date = date:gsub ('%d', cfg.date_names.xlate_digits);			-- translate digits from Western to 'local digits'
1128 				date_parameters_list[param_name].val = date;					-- save the translated date
1129 				modified = true;
1130 			end
1131 		end
1132 	end
1133 	
1134 	return modified;
1135 end
1136 
1137 
1138 --[[--------------------------< S E T _ S E L E C T E D _ M O D U L E S >--------------------------------------
1139 
1140 Sets local imported functions table to same (live or sandbox) as that used by the other modules.
1141 
1142 ]]
1143 
1144 local function set_selected_modules (cfg_table_ptr, utilities_page_ptr)
1145 	add_prop_cat = utilities_page_ptr.add_prop_cat ;							-- import functions from selected Module:Citation/CS1/Utilities module
1146 	is_set = utilities_page_ptr.is_set;
1147 	in_array = utilities_page_ptr.in_array;
1148 	set_message = utilities_page_ptr.set_message;
1149 	substitute = utilities_page_ptr.substitute;
1150 	wrap_style = utilities_page_ptr.wrap_style;
1151 
1152 	cfg = cfg_table_ptr;														-- import tables from selected Module:Citation/CS1/Configuration
1153 end
1154 
1155 
1156 --[[--------------------------< E X P O R T E D   F U N C T I O N S >------------------------------------------
1157 ]]
1158 
1159 return {																		-- return exported functions
1160 	dates = dates,
1161 	year_date_check = year_date_check,
1162 	reformat_dates = reformat_dates,
1163 	date_hyphen_to_dash = date_hyphen_to_dash,
1164 	date_name_xlate = date_name_xlate,
1165 	edtf_transform = edtf_transform,
1166 	set_selected_modules = set_selected_modules
1167 	}