Saltar para o conteúdo

Módulo:languages/data

Origem: Wikcionário, o dicionário livre.

A documentação para este módulo pode ser criada na página Módulo:languages/data/doc

local u = mw.ustring.char
local export = {}

--[=[
Here is a list of the language fields by order of frequency according to [[User:Erutuon/language_stuff]].
If the order changes, change the order here for potentially greater efficiency.

local fields = {
	"canonical_name",
	"wikidata_item",
	"family",
	"scripts",
	"other_names",
	"ancestors",
	"type",
	"translit",
	"entry_name",
	"sort_key",
	"override_translit",
	"wikimedia_codes",
	"standard_chars",
	"wikipedia_article",
	"link_tr",
}

--[=[
Insert the fields into the table with their values as their frequency ranking.
{export.most_common_field = 1, export.second_most_common_field = 2, ... }

for i, field in ipairs(fields) do
	export[field] = i
end
]=]
-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
	grave			= u(0x0300),
	acute			= u(0x0301),
	circ			= u(0x0302),
	tilde			= u(0x0303),
	macron			= u(0x0304),
	overline		= u(0x0305),
	breve			= u(0x0306),
	dotabove		= u(0x0307),
	diaer			= u(0x0308),
	ringabove		= u(0x030A),
	dacute			= u(0x030B),
	caron			= u(0x030C),
	lineabove		= u(0x030D),
	dgrave			= u(0x030F),
	invbreve		= u(0x0311),
	commaabove		= u(0x0313),
	revcommaabove	= u(0x0314),
	dotbelow		= u(0x0323),
	diaerbelow		= u(0x0324),
	ringbelow		= u(0x0325),
	cedilla			= u(0x0327),
	ogonek			= u(0x0328),
	brevebelow		= u(0x032E),
	macronbelow		= u(0x0331),
	perispomeni		= u(0x0342),
	ypogegrammeni	= u(0x0345),
	CGJ				= u(0x034F), -- combining grapheme joiner
	dbrevebelow		= u(0x035C),
	dinvbreve		= u(0x0361),
	kashida			= u(0x0640),
	fathatan		= u(0x064B),
	dammatan		= u(0x064C),
	kasratan		= u(0x064D),
	fatha			= u(0x064E),
	damma			= u(0x064F),
	kasra			= u(0x0650),
	shadda			= u(0x0651),
	sukun			= u(0x0652),
	nunghunna		= u(0x0658),
	smallv			= u(0x065A),
	superalef		= u(0x0670),
	psili			= u(0x1FBD),
	coronis			= u(0x1FBF),
	ZWNJ			= u(0x200C), -- zero width non-joiner
	ZWJ				= u(0x200D), -- zero width joiner
	RSQuo			= u(0x2019), -- right single quote
	VS01			= u(0xFE00), -- variation selector 1
	-- Punctuation to be used for standardChars field.
	punc			= " !#%&*+,-./:;<=>?@^_`|~\'()∅"
}
export.chars = c

local p = {}
for i = 1, 32 do
	p[i] = u(0xF000+i-1)
end
export.puaChars = p

local s = {}
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.

s["cau-Cyrl-displaytext"] = {
	from = {"[IlІӀ]", "ᴴ"},
	to = {"ӏ", "ᵸ"}
}

s["cau-Cyrl-entryname"] = {
	remove_diacritics = c.grave .. c.acute .. c.macron,
	from = s["cau-Cyrl-displaytext"].from,
	to = s["cau-Cyrl-displaytext"].to
}

s["cau-Latn-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.macron}

s["Grek-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
	from = {"ς"},
	to = {"σ"}
}

local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
s["Kore-entryname"] = {
	from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "]+%)", u(0x302E), u(0x302F)},
	to = {"%1", "%1%2"}
}

s["Mong-displaytext"] = {
	from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
	to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"}
}

s["Mong-entryname"] = s["Mong-displaytext"]

s["no-sortkey"] = {
	from = {"æ", "ø", "å"},
	to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
}

s["roa-oil-sortkey"] = {
	remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'",
	from = {"æ", "œ"},
	to = {"ae", "oe"}
}

s["Tibt-displaytext"] = {
	from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"},
	to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"}
}

s["Tibt-entryname"] = s["Tibt-displaytext"]

-- Capture patterns used to prevent formatting from being disrupted while text is being processed.
export.patterns = {
	"((" .. u(0x007F) .. "'\"`UNIQ%-%-%l+%-%x+%-QINU`\"'" .. u(0x007F) .. "))", -- Strip marker
	"((<[^<>]+>))", -- HTML tag
	"('*(''').-'*('''))", -- Bold
	"('*('').-'*(''))", -- Italics
	"((%[%[)[^[%]]*(]]))", -- Internal link
	"((%[)[^[%]]*(]))", -- External link
	"((https?://%S+))" -- URL
}

export.shared = s

return export