Module:Strings

Documentation for this module may be created at Module:Strings/doc
local p = {}

--[[
Converts a string to all uppercase letters.
Usage on a wiki page: {{#invoke:Strings|to_upper|text to convert}}
]]
function p.to_upper(frame)
	local text = frame.args[1] or ''
	return mw.ustring.upper(text)
end

--[[
Converts a string to all lowercase letters.
Usage on a wiki page: {{#invoke:Strings|to_lower|text to convert}}
]]
function p.to_lower(frame)
	local text = frame.args[1] or ''
	return mw.ustring.lower(text)
end

--[[
Returns the first item from a comma-separated list
stored in a MediaWiki: message page.
Usage: {{#invoke:Strings|getFirst|message-name}}
]]
function p.getFirst(frame)
    -- Get the message name passed from the template (e.g., 'pf-values-sex')
    local messageName = frame.args[1]
    if not messageName then
        return '' -- Return nothing if no message name is provided
    end

    -- Use the parser to get the internationalized content of the message
    local messageContent = frame:callParserFunction('int', messageName)

    -- Split the string by the comma and return the first part
    local parts = mw.text.split(messageContent, '%s*,%s*') -- Splits by comma, ignoring whitespace
    return parts[1] or ''
end

--[[
This function takes a comma-separated string of wikitext,
sorts the items alphabetically, and returns the sorted string.
]]
function p.tokenSort(frame)
	-- Get the string passed as the first argument in the #invoke call.
	-- e.g., {{#invoke:LinkSorter|sort|[[C]], [[A]], [[B]]}}
	local inputString = frame.args[1] or ''
	-- If the input is empty or just whitespace, return nothing.
	if inputString:match('^%s*$') then
		return ''
	end

	-- Create a table to hold the individual link strings.
	local linksTable = {}

	-- Split the input string by commas and trim any whitespace around each link.
	-- The pattern captures everything that is not a comma.
	for link in mw.ustring.gmatch(inputString, '([^,]+)') do
		-- Trim whitespace from the beginning and end of the captured string.
		local trimmedLink = mw.ustring.gsub(link, '^%s*(.-)%s*$', '%1')
		table.insert(linksTable, trimmedLink)
	end
	
	-- If the table is empty after processing, return nothing.
	if #linksTable == 0 then
		return ''
	end

	-- Sort the table alphabetically. This is the core step.
	table.sort(linksTable)

	-- Join the sorted table back into a single string, separated by a comma and a space.
	return table.concat(linksTable, ', ')
end

function p.decode(frame)
    -- Get the first unnamed argument from the template call, or default to an empty string
    local s = frame.args[1] or ''

    -- 1. Replace all '+' signs with spaces.
    -- We must do this replacement first.
    s = string.gsub(s, '+', ' ')

    -- 2. Find all percent-encoded sequences (e.g., %3A)
    --    - '%%' matches the literal '%' symbol.
    --    - '([0-9a-fA-F][0-9a-fA-F])' captures two hexadecimal digits (case-insensitive).
    --    - The captured hex string ('h') is passed to the anonymous function.
    --    - tonumber(h, 16) converts the hex string (base 16) to a decimal number.
    --    - string.char() converts that number into the corresponding character.
    s = string.gsub(s, '%%([0-9a-fA-F][0-9a-fA-F])', function(h)
        return string.char(tonumber(h, 16))
    end)

    return s
end

function p.parenthetical(frame)
    local text = frame.args[1] or ''
    text = mw.text.trim(text)
    local match = string.match(text, "%((.-)%)")
    return match or text
end

function p.getCanonicalName(frame)
    -- 1. Get the argument from the template and trim whitespace
    local tokenValue = frame.args[1]
    if not tokenValue then
        return '' -- No value provided
    end
    tokenValue = mw.text.trim(tokenValue)

    -- 2. ======== CRITICAL CHECK 1 ========
    -- First, check if the *entire string* is a valid page title.
    -- This handles 'combobox' values and pages whose names
    -- naturally include parentheses, e.g., "My Page (Project A)".
    local fullTitleObj = mw.title.new(tokenValue)
    
    -- Note: .exists is a moderately expensive check.
    -- We check if fullTitleObj is not nil (valid title) AND it exists.
    if fullTitleObj and fullTitleObj.exists then
        return tokenValue -- The whole string is the page, return it.
    end

    -- 3. ======== CRITICAL CHECK 2 ========
    -- If the full string isn't an existing page, *then*
    -- we assume it's in the 'Display Title (Canonical Name)' format
    -- and try to parse it.

    -- We must find the last ')'
    if tokenValue:sub(-1) ~= ')' then
        -- It doesn't end in ')', so it's not the 'tokens' format.
        -- It's probably a red link from a combobox.
        return tokenValue
    end

    local len = tokenValue:len()
    local level = 0
    local split_pos = nil

    -- 4. Backward search for matching parentheses
    for i = len - 1, 1, -1 do
        local char = tokenValue:sub(i, i)
        
        if char == ')' then
            level = level + 1
        elseif char == '(' then
            if level == 0 then
                -- This is the opening '(' that matches the final ')'
                split_pos = i
                break
            else
                level = level - 1
            end
        end
    end

    -- 5. Extract and Validate the parts
    if split_pos then
        -- Extract the part inside the parentheses
        local canonicalName = mw.text.trim(tokenValue:sub(split_pos + 1, len - 1))
        
        -- Extract the part before the parentheses (the display title)
        local displayTitle = mw.text.trim(tokenValue:sub(1, split_pos - 1))

        -- Final check: Is the extracted canonicalName a valid title?
        -- (even if it's a red link, mw.title.new will return an object)
        local canonicalTitleObj = mw.title.new(canonicalName)

        if canonicalTitleObj then
            -- Success! We've parsed it. Return the canonical name.
            return canonicalName
        else
            -- The text in parens was not a valid title (e.g., "Page (invalid:char)").
            -- Fall back to the original value.
            return tokenValue
        end
    else
        -- We found a ')' at the end but no matching '('.
        -- The string is malformed, so just return the original value for safety.
        return tokenValue
    end
end

return p