Jump to content

Module:scripts/charToScript

ꯋꯤꯛꯁꯟꯅꯔꯤ ꯗꯒꯤ

Documentation for this module may be created at Module:scripts/charToScript/doc

local subexport = {}  -- Copied from [[Module:Unicode data]]. local floor = math.floor local function binaryRangeSearch(codepoint, ranges) local low, mid, high low, high = 1, ranges.length or require "Module:table".length(ranges) while low <= high do mid = floor((low + high) / 2) local range = ranges[mid] if codepoint < range[1] then high = mid - 1 elseif codepoint <= range[2] then return range, mid else low = mid + 1 end end return nil, mid end  -- Copied from [[Module:Unicode data]]. local function linearRangeSearch(codepoint, ranges) for i, range in ipairs(ranges) do if codepoint < range[1] then break elseif codepoint <= range[2] then return range end end end  local function compareRanges(range1, range2) return range1[1] < range2[1] end  -- Save previously used codepoint ranges in case another character is in the -- same range. local rangesCache = {}  --[=[ Takes a codepoint or a character and finds the script code (if any) that is appropriate for it based on the codepoint, using the data module [[Module:scripts/recognition data]]. The data module was generated from the patterns in [[Module:scripts/data]] using [[Module:User:Erutuon/script recognition]].  Converts the character to a codepoint. Returns a script code if the codepoint is in the list of individual characters, or if it is in one of the defined ranges in the 4096-character block that it belongs to, else returns "None". ]=] local charToScriptData function subexport.charToScript(char) charToScriptData = charToScriptData or mw.loadData("Module:scripts/recognition data") local t = type(char) local codepoint if t == "string" then local etc codepoint, etc = mw.ustring.codepoint(char, 1, 2) if etc then error("bad argument #1 to 'charToScript' (expected a single character)") end elseif t == "number" then codepoint = char else error(("bad argument #1 to 'charToScript' (expected string or a number, got %s)") :format(t)) end  local individualMatch = charToScriptData.individual[codepoint] if individualMatch then return individualMatch else local range if rangesCache[1] then range = linearRangeSearch(codepoint, rangesCache) if range then return range[3] end end  local index = floor(codepoint / 0x1000)  range = linearRangeSearch(index, charToScriptData.blocks) if not range and charToScriptData[index] then range = binaryRangeSearch(codepoint, charToScriptData[index]) if range then table.insert(rangesCache, range) table.sort(rangesCache, compareRanges) end end  return range and range[3] or "None" end end  function subexport.findBestScriptWithoutLang(text) local scripts = {} for character in text:gmatch("[%z\1-\127\194-\244][\128-\191]*") do local script = subexport.charToScript(character) scripts[script] = (scripts[script] or 0) + 1 end  local bestScript local greatestCount = 0 for script, count in pairs(scripts) do if count > greatestCount then bestScript = script greatestCount = count end end  return bestScript end  return subexport