پودمان:Text

    از ویکی‌نور

    توضیحات این پودمان می‌تواند در پودمان:Text/توضیحات قرار گیرد.

    local yesNo = require("Module:Yesno")
    local Text = { serial = "2022-07-21",
                   suite  = "Text" }
    --[=[
    Text utilities
    ]=]
    
    
    
    -- local globals
    local PatternCJK        = false
    local PatternCombined   = false
    local PatternLatin      = false
    local PatternTerminated = false
    local QuoteLang         = false
    local QuoteType         = false
    local RangesLatin       = false
    local SeekQuote         = false
    
    local function initLatinData()
        if not RangesLatin then
            RangesLatin = { {    7,  687 },
                            { 7531, 7578 },
                            { 7680, 7935 },
                            { 8194, 8250 } }
        end
        if not PatternLatin then
            local range
            PatternLatin = "^["
            for i = 1, #RangesLatin do
                range = RangesLatin[ i ]
                PatternLatin = PatternLatin ..
                               mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
            end    -- for i
            PatternLatin = PatternLatin .. "]*$"
        end
    end
    
    local function initQuoteData()
        -- Create quote definitions
        if not QuoteLang then
        	QuoteLang = 
        	        { af        = "bd",
                      ar        = "la",
                      be        = "labd",
                      bg        = "bd",
                      ca        = "la",
                      cs        = "bd",
                      da        = "bd",
                      de        = "bd",
                      dsb       = "bd",
                      et        = "bd",
                      el        = "lald",
                      en        = "ld",
                      es        = "la",
                      eu        = "la",
                --    fa        = "la",
                      fi        = "rd",
                      fr        = "laSPC",
                      ga        = "ld",
                      he        = "ldla",
                      hr        = "bd",
                      hsb       = "bd",
                      hu        = "bd",
                      hy        = "labd",
                      id        = "rd",
                      is        = "bd",
                      it        = "ld",
                      ja        = "x300C",
                      ka        = "bd",
                      ko        = "ld",
                      lt        = "bd",
                      lv        = "bd",
                      nl        = "ld",
                      nn        = "la",
                      no        = "la",
                      pl        = "bdla",
                      pt        = "lald",
                      ro        = "bdla",
                      ru        = "labd",
                      sk        = "bd",
                      sl        = "bd",
                      sq        = "la",
                      sr        = "bx",
                      sv        = "rd",
                      th        = "ld",
                      tr        = "ld",
                      uk        = "la",
                      zh        = "ld",
                      ["de-ch"] = "la",
                      ["en-gb"] = "lsld",
                      ["en-us"] = "ld",
                      ["fr-ch"] = "la",
                      ["it-ch"] = "la",
                      ["pt-br"] = "ldla",
                      ["zh-tw"] = "x300C",
                      ["zh-cn"] = "ld" }
        end
        if not QuoteType then
        	QuoteType = 
        	        { bd    = { { 8222, 8220 },  { 8218, 8217 } },
                      bdla  = { { 8222, 8220 },  {  171,  187 } },
                      bx    = { { 8222, 8221 },  { 8218, 8217 } },
                      la    = { {  171,  187 },  { 8249, 8250 } },
                      laSPC = { {  171,  187 },  { 8249, 8250 },  true },
                      labd  = { {  171,  187 },  { 8222, 8220 } },
                      lald  = { {  171,  187 },  { 8220, 8221 } },
                      ld    = { { 8220, 8221 },  { 8216, 8217 } },
                      ldla  = { { 8220, 8221 },  {  171,  187 } },
                      lsld  = { { 8216, 8217 },  { 8220, 8221 } },
                      rd    = { { 8221, 8221 },  { 8217, 8217 } },
                      x300C = { { 0x300C, 0x300D },
                                { 0x300E, 0x300F } } }
        end
    end -- initQuoteData()
    
    
    
    local function fiatQuote( apply, alien, advance )
        -- Quote text
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code
        --     advance  -- number, with level 1 or 2
        local r = apply and tostring(apply) or ""
        alien = alien or "en"
        advance = tonumber(advance) or 0
        local suite
        initQuoteData()
        local slang = alien:match( "^(%l+)-" )
        suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
        if suite then
            local quotes = QuoteType[ suite ]
            if quotes then
                local space
                if quotes[ 3 ] then
                    space = " "
                else
                    space = ""
                end
                quotes = quotes[ advance ]
                if quotes then
                    r = mw.ustring.format( "%s%s%s%s%s",
                                           mw.ustring.char( quotes[ 1 ] ),
                                           space,
                                           apply,
                                           space,
                                           mw.ustring.char( quotes[ 2 ] ) )
                end
            else
                mw.log( "fiatQuote() " .. suite )
            end
        end
        return r
    end -- fiatQuote()
    
    
    
    Text.char = function ( apply, again, accept )
        -- Create string from codepoints
        -- Parameter:
        --     apply   -- table (sequence) with numerical codepoints, or nil
        --     again   -- number of repetitions, or nil
        --     accept  -- true, if no error messages to be appended
        -- Returns: string
        local r = ""
        apply = type(apply) == "table" and apply or {}
        again = math.floor(tonumber(again) or 1)
        if again < 1 then
        	return ""
        end
        local bad   = { }
        local codes = { }
        for _, v in ipairs( apply ) do
        	local n = tonumber(v)
        	if not n or (n < 32 and n ~= 9 and n ~= 10) then
        		table.insert(bad, tostring(v))
        	else
        		table.insert(codes, math.floor(n))
    		end
        end 
        if #bad > 0 then
        	if not accept then
        		r = tostring(  mw.html.create( "span" )
                        		:addClass( "error" )
                        		:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
        	end
        	return r
        end
        if #codes > 0 then
        	r = mw.ustring.char( unpack( codes ) )
        	if again > 1 then
        		r = r:rep(again)
        	end
    	end
        return r
    end -- Text.char()
    
    local function trimAndFormat(args, fmt)
    	local result = {}
    	if type(args) ~= 'table' then
    		args = {args}
    	end
    	for _, v in ipairs(args) do
    		v = mw.text.trim(tostring(v))
    		if v ~= "" then
    			table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
    		end
    	end
    	return result
    end
    
    Text.concatParams = function ( args, apply, adapt )
        -- Concat list items into one string
        -- Parameter:
        --     args   -- table (sequence) with numKey=string
        --     apply  -- string (optional); separator (default: "|")
        --     adapt  -- string (optional); format including "%s"
        -- Returns: string
        local collect = { }
        return table.concat(trimAndFormat(args,adapt), apply or "|")
    end -- Text.concatParams()
    
    
    
    Text.containsCJK = function ( s )
        -- Is any CJK code within?
        -- Parameter:
        --     s  -- string
        -- Returns: true, if CJK detected
        s = s and tostring(s) or ""
        if not patternCJK then
            patternCJK = mw.ustring.char( 91,
            	                            4352, 45,   4607,
            	                           11904, 45,  42191,
            	                           43072, 45,  43135,
            	                           44032, 45,  55215,
            	                           63744, 45,  64255,
            	                           65072, 45,  65103,
            	                           65381, 45,  65500,
                                          131072, 45, 196607,
                                          93 )
        end
        return mw.ustring.find( s, patternCJK ) ~= nil
    end -- Text.containsCJK()
    
    Text.removeDelimited = function (s, prefix, suffix)
    	-- Remove all text in s delimited by prefix and suffix (inclusive)
    	-- Arguments:
    	--    s = string to process
    	--    prefix = initial delimiter
    	--    suffix = ending delimiter
    	-- Returns: stripped string
    	s = s and tostring(s) or ""
    	prefix = prefix and tostring(prefix) or ""
    	suffix = suffix and tostring(suffix) or ""
    	local prefixLen = mw.ustring.len(prefix)
    	local suffixLen = mw.ustring.len(suffix)
    	if prefixLen == 0 or suffixLen == 0 then
    		return s
    	end
    	local i = s:find(prefix, 1, true)
    	local r = s
    	local j
    	while i do
    		j = r:find(suffix, i + prefixLen)
    		if j then
    			r = r:sub(1, i - 1)..r:sub(j+suffixLen)
    		else
    			r = r:sub(1, i - 1)
    		end
    		i = r:find(prefix, 1, true)
    	end
    	return r
    end
    
    Text.getPlain = function ( adjust )
        -- Remove wikisyntax from string, except templates
        -- Parameter:
        --     adjust  -- string
        -- Returns: string
        local r = Text.removeDelimited(adjust,"<!--","-->")
        r = r:gsub( "(</?%l[^>]*>)", "" )
             :gsub( "'''", "" )
             :gsub( "''", "" )
             :gsub( "&nbsp;", " " )
        return r
    end -- Text.getPlain()
    
    Text.isLatinRange = function (s)
        -- Are characters expected to be latin or symbols within latin texts?
        -- Arguments:
        --  s = string to analyze
        -- Returns: true, if valid for latin only
        s = s and tostring(s) or ""  --- ensure input is always string
        initLatinData()
        return mw.ustring.match(s, PatternLatin) ~= nil
    end -- Text.isLatinRange()
    
    
    
    Text.isQuote = function ( s )
        -- Is this character any quotation mark?
        -- Parameter:
        --     s = single character to analyze
        -- Returns: true, if s is quotation mark
        s = s and tostring(s) or ""
        if s == "" then
        	return false
        end
        if not SeekQuote then
            SeekQuote = mw.ustring.char(   34,       -- "
                                           39,       -- '
                                          171,       -- laquo
                                          187,       -- raquo
                                         8216,       -- lsquo
                                         8217,       -- rsquo
                                         8218,       -- sbquo
                                         8220,       -- ldquo
                                         8221,       -- rdquo
                                         8222,       -- bdquo
                                         8249,       -- lsaquo
                                         8250,       -- rsaquo
                                         0x300C,     -- CJK
                                         0x300D,     -- CJK
                                         0x300E,     -- CJK
                                         0x300F )    -- CJK
        end
        return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
    end -- Text.isQuote()
    
    
    
    Text.listToText = function ( args, adapt )
        -- Format list items similar to mw.text.listToText()
        -- Parameter:
        --     args   -- table (sequence) with numKey=string
        --     adapt  -- string (optional); format including "%s"
        -- Returns: string
        return mw.text.listToText(trimAndFormat(args, adapt))
    end -- Text.listToText()
    
    
    
    Text.quote = function ( apply, alien, advance )
        -- Quote text
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code, or nil
        --     advance  -- number, with level 1 or 2, or nil
        -- Returns: quoted string
        apply = apply and tostring(apply) or ""
        local mode, slang
        if type( alien ) == "string" then
            slang = mw.text.trim( alien ):lower()
        else
            slang = mw.title.getCurrentTitle().pageLanguage
            if not slang then
                -- TODO FIXME: Introduction expected 2017-04
                slang = mw.language.getContentLanguage():getCode()
            end
        end
        if advance == 2 then
            mode = 2
        else
            mode = 1
        end
        return fiatQuote( mw.text.trim( apply ), slang, mode )
    end -- Text.quote()
    
    
    
    Text.quoteUnquoted = function ( apply, alien, advance )
        -- Quote text, if not yet quoted and not empty
        -- Parameter:
        --     apply    -- string, with text
        --     alien    -- string, with language code, or nil
        --     advance  -- number, with level 1 or 2, or nil
        -- Returns: string; possibly quoted
        local r = mw.text.trim( apply and tostring(apply) or "" )
        local s = mw.ustring.sub( r, 1, 1 )
        if s ~= ""  and  not Text.isQuote( s, advance ) then
            s = mw.ustring.sub( r, -1, 1 )
            if not Text.isQuote( s ) then
                r = Text.quote( r, alien, advance )
            end
        end
        return r
    end -- Text.quoteUnquoted()
    
    
    
    Text.removeDiacritics = function ( adjust )
        -- Remove all diacritics
        -- Parameter:
        --     adjust  -- string
        -- Returns: string; all latin letters should be ASCII
        --                  or basic greek or cyrillic or symbols etc.
        local cleanup, decomposed
        if not PatternCombined then
            PatternCombined = mw.ustring.char( 91,
                                                0x0300, 45, 0x036F,
                                                0x1AB0, 45, 0x1AFF,
                                                0x1DC0, 45, 0x1DFF,
                                                0xFE20, 45, 0xFE2F,
                                               93 )
        end
        decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
        cleanup    = mw.ustring.gsub( decomposed, PatternCombined, "" )
        return mw.ustring.toNFC( cleanup )
    end -- Text.removeDiacritics()
    
    
    
    Text.sentenceTerminated = function ( analyse )
        -- Is string terminated by dot, question or exclamation mark?
        --     Quotation, link termination and so on granted
        -- Parameter:
        --     analyse  -- string
        -- Returns: true, if sentence terminated
        local r
        if not PatternTerminated then
            PatternTerminated = mw.ustring.char( 91,
                                                 12290,
                                                 65281,
                                                 65294,
                                                 65311 )
                                .. "!%.%?…][\"'%]‹›«»‘’“”]*$"
        end
        if mw.ustring.find( analyse, PatternTerminated ) then
            r = true
        else
            r = false
        end
        return r
    end -- Text.sentenceTerminated()
    
    
    
    Text.ucfirstAll = function ( adjust)
        -- Capitalize all words
        -- Arguments:
        --     adjust = string to adjust
        -- Returns: string with all first letters in upper case
        adjust = adjust and tostring(adjust) or ""
        local r = mw.text.decode(adjust,true)
        local i = 1
        local c, j, m
        m = (r ~= adjust)
        r = " "..r
        while i do
            i = mw.ustring.find( r, "%W%l", i )
            if i then
                j = i + 1
                c = mw.ustring.upper( mw.ustring.sub( r, j, j ) )
                r = string.format( "%s%s%s",
                                   mw.ustring.sub( r, 1, i ),
                                   c,
                                   mw.ustring.sub( r, i + 2 ) )
                i = j
            end
        end -- while i
        r = r:sub( 2 )
        if m then
        	r = mw.text.encode(r)
        end
        return r
    end -- Text.ucfirstAll()
    
    
    Text.uprightNonlatin = function ( adjust )
        -- Ensure non-italics for non-latin text parts
        --     One single greek letter might be granted
        -- Precondition:
        --     adjust  -- string
        -- Returns: string with non-latin parts enclosed in <span>
        local r
        initLatinData()
        if mw.ustring.match( adjust, PatternLatin ) then
            -- latin only, horizontal dashes, quotes
            r = adjust
        else
            local c
            local j    = false
            local k    = 1
            local m    = false
            local n    = mw.ustring.len( adjust )
            local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>"
            local flat = function ( a )
                      -- isLatin
                      local range
                      for i = 1, #RangesLatin do
                          range = RangesLatin[ i ]
                          if a >= range[ 1 ]  and  a <= range[ 2 ] then
                              return true
                          end
                      end    -- for i
                  end -- flat()
            local focus = function ( a )
                      -- char is not ambivalent
                      local r = ( a > 64 )
                      if r then
                          r = ( a < 8192  or  a > 8212 )
                      else
                          r = ( a == 38  or  a == 60 )    -- '&' '<'
                      end
                      return r
                  end -- focus()
            local form = function ( a )
                    return string.format( span,
                                          r,
                                          mw.ustring.sub( adjust, k, j - 1 ),
                                          mw.ustring.sub( adjust, j, a ) )
                  end -- form()
            r = ""
            for i = 1, n do
                c = mw.ustring.codepoint( adjust, i, i )
                if focus( c ) then
                    if flat( c ) then
                        if j then
                            if m then
                                if i == m then
                                    -- single greek letter.
                                    j = false
                                end
                                m = false
                            end
                            if j then
                                local nx = i - 1
                                local s  = ""
                                for ix = nx, 1, -1 do
                                    c = mw.ustring.sub( adjust, ix, ix )
                                    if c == " "  or  c == "(" then
                                        nx = nx - 1
                                        s  = c .. s
                                    else
                                        break -- for ix
                                    end
                                end -- for ix
                                r = form( nx ) .. s
                                j = false
                                k = i
                            end
                        end
                    elseif not j then
                        j = i
                        if c >= 880  and  c <= 1023 then
                            -- single greek letter?
                            m = i + 1
                        else
                            m = false
                        end
                    end
                elseif m then
                    m = m + 1
                end
            end    -- for i
            if j  and  ( not m  or  m < n ) then
                r = form( n )
            else
                r = r .. mw.ustring.sub( adjust, k )
            end
        end
        return r
    end -- Text.uprightNonlatin()
    
    
    Text.test = function ( about )
        local r
        if about == "quote" then
            initQuoteData()
            r = { }
            r.QuoteLang = QuoteLang
            r.QuoteType = QuoteType
        end
        return r
    end -- Text.test()
    
    
    
    -- Export
    local p = { }
    
    for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
    	p[func] = function (frame) 
    		return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
    	end
    end
    
    for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
    	p[func] = function (frame) 
    		return Text[func]( frame.args[ 1 ] or "" )
    	end
    end
    
    function p.char( frame )
        local params = frame:getParent().args
        local story = params[ 1 ]
        local codes, lenient, multiple
        if not story then
            params = frame.args
            story  = params[ 1 ]
        end
        if story then
            local items = mw.text.split( mw.text.trim(story), "%s+" )
            if #items > 0 then
                local j
                lenient  = (yesNo(params.errors) == false)
                codes    = { }
                multiple = tonumber( params[ "*" ] )
                for _, v in ipairs( items ) do
                	j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v)
                    table.insert( codes,  j or v )
                end 
            end
        end
        return Text.char( codes, multiple, lenient )
    end
    
    function p.concatParams( frame )
        local args
        local template = frame.args.template
        if type( template ) == "string" then
            template = mw.text.trim( template )
            template = ( template == "1" )
        end
        if template then
            args = frame:getParent().args
        else
            args = frame.args
        end
        return Text.concatParams( args,
                                  frame.args.separator,
                                  frame.args.format )
    end
    
    
    function p.listToFormat(frame)
        local lists = {}
        local pformat = frame.args["format"]
        local sep = frame.args["sep"] or ";"
    
        -- Parameter parsen: Listen
        for k, v in pairs(frame.args) do
            local knum = tonumber(k)
            if knum then lists[knum] = v end
        end
    
        -- Listen splitten
        local maxListLen = 0
        for i = 1, #lists do
            lists[i] = mw.text.split(lists[i], sep)
            if #lists[i] > maxListLen then maxListLen = #lists[i] end
        end
    
        -- Ergebnisstring generieren
        local result = ""
        local result_line = ""
        for i = 1, maxListLen do
            result_line = pformat
            for j = 1, #lists do
                result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1)
            end
            result = result .. result_line
        end
    
        return result
    end
    
    
    
    function p.listToText( frame )
        local args
        local template = frame.args.template
        if type( template ) == "string" then
            template = mw.text.trim( template )
            template = ( template == "1" )
        end
        if template then
            args = frame:getParent().args
        else
            args = frame.args
        end
        return Text.listToText( args, frame.args.format )
    end
    
    
    
    function p.quote( frame )
        local slang = frame.args[2]
        if type( slang ) == "string" then
            slang = mw.text.trim( slang )
            if slang == "" then
                slang = false
            end
        end
        return Text.quote( frame.args[ 1 ] or "",
                           slang,
                           tonumber( frame.args[3] ) )
    end
    
    
    
    function p.quoteUnquoted( frame )
        local slang = frame.args[2]
        if type( slang ) == "string" then
            slang = mw.text.trim( slang )
            if slang == "" then
                slang = false
            end
        end
        return Text.quoteUnquoted( frame.args[ 1 ] or "",
                                   slang,
                                   tonumber( frame.args[3] ) )
    end
    
    
    function p.zip(frame)
        local lists = {}
        local seps = {}
        local defaultsep = frame.args["sep"] or ""
        local innersep = frame.args["isep"] or ""
        local outersep = frame.args["osep"] or ""
    
        -- Parameter parsen
        for k, v in pairs(frame.args) do
            local knum = tonumber(k)
            if knum then lists[knum] = v else
                if string.sub(k, 1, 3) == "sep" then
                    local sepnum = tonumber(string.sub(k, 4))
                    if sepnum then seps[sepnum] = v end
                end
            end
        end
        -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden
        for i = 1, math.max(#seps, #lists) do
            if not seps[i] then seps[i] = defaultsep end
        end
    
        -- Listen splitten
        local maxListLen = 0
        for i = 1, #lists do
            lists[i] = mw.text.split(lists[i], seps[i])
            if #lists[i] > maxListLen then maxListLen = #lists[i] end
        end
    
        local result = ""
        for i = 1, maxListLen do
            if i ~= 1 then result = result .. outersep end
            for j = 1, #lists do
                if j ~= 1 then result = result .. innersep end
                result = result .. (lists[j][i] or "")
            end
        end
        return result
    end
    
    
    
    function p.failsafe()
        return Text.serial
    end
    
    
    
    p.Text = function ()
        return Text
    end -- p.Text
    
    return p