smallem

require('Module:No globals'); local getArgs = require ('Module:Arguments').getArgs;  local cfg = mw.loadData ('Module:Citation/CS1/Configuration');-- load the configuration module   --[[-------------------------< A D D _ T O _ L I S T >---------------------------------------------------------  adds / pair to  table as [':'] = true; uses name from  if available this format avoids duplicates so only unique / pairs are added to  ]]  local function add_to_list (list, override_list, code, name) if override_list[code] then-- look in the override table for this code list[code .. ':' .. override_list[code]] = true;-- use the name from the override table else list[code .. ':' .. name] = true;-- use the MediaWiki name and code end end   --[[-------------------------< L I S T _ F O R M A T >---------------------------------------------------------  formats / pair into a sequence table of find/replace strings for rendering the original: [':'] = true → "\|\s*language\s*=\s*\b" "|language=" \ the new so that |language= appearing in non-cs1 template is ignored: [':'] = true → "(\{\{\s*cit[aeio][^\}]*\|\s*language\s*=\s*)(\s*[\|\}])" "\1\2" \ and another new: [':'] = true → (r"(\{\{\s*cit[aeio][^\}]*\|\s*language\s*=\s*)(\s*[\|\}])", r"\1\2"),  the above with quotes and escapes   list of find/replace strings  source of code / name pairs  boolean true for machine readable version; human readable else  ]]  local function list_format (result, list) local count = 0; for k, _ in pairs (list)do local code, name = k:match ('([^:]+):(.+)');-- split key into / pair name = name:gsub (' +', '\\ '):gsub ('[%(%)/"]', '\\%1');-- escape whitespace, then parens and virgule --local str = string.format ('"\\|\\s*language\\s*=\\s*%s\\b" "|language=%s" \\', name, code); --local str = string.format ('"(\\{\\{\\s*cit[aeio][^\\}]*\\|\\s*language\\s*=\\s*)%s(\\s*[\\|\\}])" "\\1%s\\2" \\', name, code); local str = string.format ('(r"(\\{\\{\\s*cit[aeio][^\\}]*\\|\\s*language\\s*=\\s*)%s(\\s*[\\|\\}])", r"\\1%s\\2"),\t', name, code); table.insert (result, str); count = count + 1 end return count; end   --[[-------------------------< L A N G _ L I S T E R >---------------------------------------------------------  Module entry point  {{#invoke:test|lang_lister|lang=, , , ...}}  There is an issue with pasting Unicode Gothic block text into Windows cmd.exe.  Until a better solution arises this function skips any language name that contains Unicode Gothic block text (U+10330–U+U1034A) https://www.unicode.org/charts/PDF/U10330.pdf  \240\144\140\176-\240\144\141\138-- decimal equivalent of hex UTF-8 code units F0 90 8C B0 – F0 90 8D 8A  when skipped, this function emits an error message.  There is an issue with the processing outside of this module where some process converts U+200B zero width space, U+200C zero width non-joiner, and U+200D zero width joiner unicode codepoints to text strings '<200b>', '<200c>', and '<200d>'.  This function skips language names that have these unicode codepoints so that the succeeding process doesn't have the opportunity to mangle the name in the regex.  This function emits an error message for each name that it skips.  ]]  local reason_map = {-- map unicode codepoints (skip_reason) to plain text for error messages ['\226\128\139'] = 'U+200B zero width space', ['\226\128\140'] = 'U+200C zero width non-joiner', ['\226\128\141'] = 'U+200D zero width joiner', }  local function lang_lister (frame) local args = getArgs (frame); local plain = 'yes' == args.plain; local lang; local source_list; local override = cfg.lang_tag_remap; local list={}; local en_ref_list = mw.language.fetchLanguageNames ('en', 'all');-- make a en.wiki language list local iw_map = mw.site.interwikiMap ('local');-- get list of all local wikis  local lang_codes = {}; local lang_code_check_list = {};  for k, v in pairs (iw_map) do-- look at each wiki in the iw map if en_ref_list[v.prefix] then-- if the prefix is a language code table.insert (lang_codes, v.prefix);-- add the prefix to the lang codes table end end  --local function sort (a, b)-- for descending sort --return a > b; --end  --table.sort (lang_codes, sort);-- descending sort table.sort (lang_codes);-- ascending sort if args.list then if plain then return 'CodeListBegin:' .. table.concat (lang_codes, ', ') .. ':CodeListEnd';-- make a semi pretty list of the code and done else-- for human readable local max = #lang_codes;-- local copy of the number of codes local list_num = 99;-- default for 100-item lists  if tonumber (args.list) then-- if |list= has number value if max < tonumber (args.list) then-- |list= cannot be more than the number of codes list_num = 99;-- default to 100-item lists else list_num = args.list - 1;-- adjust for table.concat limit end else list_num = 99;-- default for 100-item lists end  local out = {'CodeListBegin'};-- create initialized output table  for i=1, max, list_num+1 do-- for each |list= number of codes (or whatever remains at the end) local limit = i+list_num;-- set table.concat limit limit = max > limit and limit or max;-- set limit to prevent nil concatenation at end table.insert (out, table.concat (lang_codes, ', ', i, limit));-- concat codes from lang_codes[i] to lang_code[limit] and save in out{} if limit ~= max then-- not yet got to max table.insert (out, '

');-- insert line breaks for each group (except last) end end table.insert (out, '
CodeListEnd');-- close the code list return table.concat (out);-- final concat and done end end mw.log (table.concat (lang_codes, ', '));-- put a copy of the language code list in the Lua log  if not args.lang then return '\"font-size: 100%; font-style: normal;\" class=\"error\">missing or empty |lang='; end  args.lang = args.lang:gsub ('%s*,$', '');-- strip trailing comma if present local lang_codes = mw.text.split (args.lang, '%s*,%s*');-- make a table of lang codes from comma separated list  for _, code in ipairs (lang_codes) do-- error check code code = code:lower(); if not en_ref_list [code] then-- codes from |lang= must be found in the English list of codes and names return '\"font-size: 100%; font-style: normal;\" class=\"error\">\ |lang= has invalid code: \ ' .. code .. ''; end end  local skipped = {};  for _, lang_code in ipairs (lang_codes) do-- for each lang code in the list source_list = mw.language.fetchLanguageNames (lang_code, 'all');-- make a source list for that language  for code, name in pairs (source_list) do-- get / pairs from the source list local name_not_ascii = name ~= string.match (name, '[%w%p ]*');-- test for values that are simple ASCII text and bypass other tests if true if name_not_ascii then mw.log (name) end local skip_reason;-- init/re-init; holds skip_reason text string or capture from unicode codepoint match  if name_not_ascii then-- skip tests – test only those names that have non-ascii characters if mw.ustring.find (name, '[\240\144\140\176-\240\144\141\138]+') then-- unicode gothic block (U+10330–U+1034A); breaks windows cmd.exe skip_reason = 'gothic unicode block'; else skip_reason = mw.ustring.match (name, '[\226\128\139-\226\128\141]');-- U+200B zero width space, U+200C zero width non-joiner, U+200D zero width joiner end end if en_ref_list[name:lower()] then-- if lowercase language name is same as a known language tag if name:lower() ~= code then-- if lowercase language name does not use itself as a language tag (tiv for Tiv, ok; ga for Irish; not ok) skip_reason = 'language name matches another language\'s tag';-- 'Ga' is a language name; 'ga' is language tag for Irish end end if skip_reason then-- if there is a reason to skip table.insert (skipped, table.concat ({ 'skipped: ', '', code, ': ', name, '; from: ', lang_code, '.wiki [', reason_map[skip_reason] or skip_reason,-- add the reason for skipping ']' })); else if name_not_ascii then-- character delete tests – test only those names that have non-ascii characters name = mw.ustring.gsub (name, '[\226\128\142-\226\128\143]', '');-- replace spurious U+200E left-to-right and U+200F right-to-left marks with empty string name = mw.ustring.gsub (name, '\239\187\191', '');-- replace spurious U+FEFF zero width no-break space with empty string end add_to_list (list, override, code, name);--  is where we will add / pairs, will use  from  if available end end end local result = {};-- temp table local out = {};-- final output goes here local count;-- debug to find out how may items are in result{} count = list_format (result, list);-- formats / pairs into find/replace strings  mw.logObject (count, 'count')  if 0 ~= #skipped then-- if we skipped any table.insert (out, '\"font-size: 100%; font-style: normal;\" class=\"error\">' .. table.concat (skipped, '
') .. '
');-- make a big string and put it in the output at the top end  table.sort (result); if plain then-- for machine readable version table.insert (result, 1, 'RegexListBegin:');-- opening keyword at begining of sequence table result[#result] = result[#result]:gsub (', $', '');-- remove trailing comma from last regex table.insert (result, ':RegexListEnd');-- closing keyword at end of sequence table table.insert (out, table.concat (result)); table.insert (out, table.concat ({'Regex count: ', count, '
'}));-- add count of regexes rendered return table.concat (out);-- final concatenation and done else-- for human readable version, make a bulleted list in columns table.insert (result, 1, 'RegexListBegin'); table.insert (out, table.concat (result, '\n*')); table.insert (out, '
RegexListEnd'); table.insert (out, table.concat ({'Regex count: ', count, '
'}));-- add count of regexes rendered return table.concat (out, '\n');-- final concatenation and done end end   --[[-------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ ]]  return { lang_lister = lang_lister, };
smallem

Tags:

🔥 Trending searches on Wiki Shqip: