Current File : //usr/share/texlive/texmf-dist/tex/luatex/lualibs/lualibs-util-prs.lua |
if not modules then modules = { } end modules ['util-prs'] = {
version = 1.001,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
local lpeg, table, string = lpeg, table, string
local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
local concat, gmatch, find = table.concat, string.gmatch, string.find
local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
local mod, div = math.mod, math.div
utilities = utilities or {}
local parsers = utilities.parsers or { }
utilities.parsers = parsers
local patterns = parsers.patterns or { }
parsers.patterns = patterns
local setmetatableindex = table.setmetatableindex
local sortedhash = table.sortedhash
local sortedkeys = table.sortedkeys
local tohash = table.tohash
local hashes = { }
parsers.hashes = hashes
-- we share some patterns
local digit = R("09")
local space = P(' ')
local equal = P("=")
local colon = P(":")
local comma = P(",")
local lbrace = P("{")
local rbrace = P("}")
local lparent = P("(")
local rparent = P(")")
local lbracket = P("[")
local rbracket = P("]")
local period = S(".")
local punctuation = S(".,:;")
local spacer = lpegpatterns.spacer
local whitespace = lpegpatterns.whitespace
local newline = lpegpatterns.newline
local anything = lpegpatterns.anything
local endofstring = lpegpatterns.endofstring
local nobrace = 1 - (lbrace + rbrace )
local noparent = 1 - (lparent + rparent)
local nobracket = 1 - (lbracket + rbracket)
-- we could use a Cf Cg construct
local escape, left, right = P("\\"), P('{'), P('}')
lpegpatterns.balanced = P {
[1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
[2] = left * V(1) * right
}
local nestedbraces = P { lbrace * (nobrace + V(1))^0 * rbrace }
local nestedparents = P { lparent * (noparent + V(1))^0 * rparent }
local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
local spaces = space^0
local argument = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
local content = (1-endofstring)^0
lpegpatterns.nestedbraces = nestedbraces -- no capture
lpegpatterns.nestedparents = nestedparents -- no capture
lpegpatterns.nested = nestedbraces -- no capture
lpegpatterns.argument = argument -- argument after e.g. =
lpegpatterns.content = content -- rest after e.g =
local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
+ C((nestedbraces + (1-comma))^0)
local key = C((1-equal-comma)^1)
local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
local pattern_c = (space+comma)^0 * (key * equal * value)
local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
local key = C((1-space-equal-comma)^1)
local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored
local hash = { }
local function set(key,value)
hash[key] = value
end
local pattern_a_s = (pattern_a/set)^1
local pattern_b_s = (pattern_b/set)^1
local pattern_c_s = (pattern_c/set)^1
local pattern_d_s = (pattern_d/set)^1
patterns.settings_to_hash_a = pattern_a_s
patterns.settings_to_hash_b = pattern_b_s
patterns.settings_to_hash_c = pattern_c_s
patterns.settings_to_hash_d = pattern_d_s
function parsers.make_settings_to_hash_pattern(set,how)
if how == "strict" then
return (pattern_c/set)^1
elseif how == "tolerant" then
return (pattern_b/set)^1
else
return (pattern_a/set)^1
end
end
function parsers.settings_to_hash(str,existing)
if not str or str == "" then
return { }
elseif type(str) == "table" then
if existing then
for k, v in next, str do
existing[k] = v
end
return exiting
else
return str
end
else
hash = existing or { }
lpegmatch(pattern_a_s,str)
return hash
end
end
function parsers.settings_to_hash_colon_too(str)
if not str or str == "" then
return { }
elseif type(str) == "table" then
return str
else
hash = { }
lpegmatch(pattern_d_s,str)
return hash
end
end
function parsers.settings_to_hash_tolerant(str,existing)
if not str or str == "" then
return { }
elseif type(str) == "table" then
if existing then
for k, v in next, str do
existing[k] = v
end
return exiting
else
return str
end
else
hash = existing or { }
lpegmatch(pattern_b_s,str)
return hash
end
end
function parsers.settings_to_hash_strict(str,existing)
if not str or str == "" then
return nil
elseif type(str) == "table" then
if existing then
for k, v in next, str do
existing[k] = v
end
return exiting
else
return str
end
elseif str and str ~= "" then
hash = existing or { }
lpegmatch(pattern_c_s,str)
return next(hash) and hash
end
end
local separator = comma * space^0
local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
+ C((nestedbraces + (1-comma))^0)
local pattern = spaces * Ct(value*(separator*value)^0)
-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored
patterns.settings_to_array = pattern
-- we could use a weak table as cache
function parsers.settings_to_array(str,strict)
if not str or str == "" then
return { }
elseif type(str) == "table" then
return str
elseif strict then
if find(str,"{",1,true) then
return lpegmatch(pattern,str)
else
return { str }
end
elseif find(str,",",1,true) then
return lpegmatch(pattern,str)
else
return { str }
end
end
function parsers.settings_to_numbers(str)
if not str or str == "" then
return { }
end
if type(str) == "table" then
-- fall through
elseif find(str,",",1,true) then
str = lpegmatch(pattern,str)
else
return { tonumber(str) }
end
for i=1,#str do
str[i] = tonumber(str[i])
end
return str
end
local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
+ C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
local pattern = spaces * Ct(value*(separator*value)^0)
function parsers.settings_to_array_obey_fences(str)
return lpegmatch(pattern,str)
end
-- inspect(parsers.settings_to_array_obey_fences("url(http://a,b.c)"))
-- this one also strips end spaces before separators
--
-- "{123} , 456 " -> "123" "456"
-- local separator = space^0 * comma * space^0
-- local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
-- + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0)
-- local withvalue = Carg(1) * value / function(f,s) return f(s) end
-- local pattern_a = spaces * Ct(value*(separator*value)^0)
-- local pattern_b = spaces * withvalue * (separator*withvalue)^0
local cache_a = { }
local cache_b = { }
function parsers.groupedsplitat(symbol,withaction)
if not symbol then
symbol = ","
end
local pattern = (withaction and cache_b or cache_a)[symbol]
if not pattern then
local symbols = S(symbol)
local separator = space^0 * symbols * space^0
local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
+ C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
if withaction then
local withvalue = Carg(1) * value / function(f,s) return f(s) end
pattern = spaces * withvalue * (separator*withvalue)^0
cache_b[symbol] = pattern
else
pattern = spaces * Ct(value*(separator*value)^0)
cache_a[symbol] = pattern
end
end
return pattern
end
local pattern_a = parsers.groupedsplitat(",",false)
local pattern_b = parsers.groupedsplitat(",",true)
function parsers.stripped_settings_to_array(str)
if not str or str == "" then
return { }
else
return lpegmatch(pattern_a,str)
end
end
function parsers.process_stripped_settings(str,action)
if not str or str == "" then
return { }
else
return lpegmatch(pattern_b,str,1,action)
end
end
-- parsers.process_stripped_settings("{123} , 456 ",function(s) print("["..s.."]") end)
-- parsers.process_stripped_settings("123 , 456 ",function(s) print("["..s.."]") end)
local function set(t,v)
t[#t+1] = v
end
local value = P(Carg(1)*value) / set
local pattern = value*(separator*value)^0 * Carg(1)
function parsers.add_settings_to_array(t,str)
return lpegmatch(pattern,str,nil,t)
end
function parsers.hash_to_string(h,separator,yes,no,strict,omit)
if h then
local t = { }
local tn = 0
local s = sortedkeys(h)
omit = omit and tohash(omit)
for i=1,#s do
local key = s[i]
if not omit or not omit[key] then
local value = h[key]
if type(value) == "boolean" then
if yes and no then
if value then
tn = tn + 1
t[tn] = key .. '=' .. yes
elseif not strict then
tn = tn + 1
t[tn] = key .. '=' .. no
end
elseif value or not strict then
tn = tn + 1
t[tn] = key .. '=' .. tostring(value)
end
else
tn = tn + 1
t[tn] = key .. '=' .. value
end
end
end
return concat(t,separator or ",")
else
return ""
end
end
function parsers.array_to_string(a,separator)
if a then
return concat(a,separator or ",")
else
return ""
end
end
-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway
-- if str then
-- t = t or { }
-- for s in gmatch(str,"[^, ]+") do -- space added
-- t[s] = true
-- end
-- return t
-- else
-- return { }
-- end
-- end
local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset)
function parsers.settings_to_set(str)
return str and lpegmatch(pattern,str) or { }
end
hashes.settings_to_set = table.setmetatableindex(function(t,k) -- experiment, not public
local v = k and lpegmatch(pattern,k) or { }
t[k] = v
return v
end)
-- as we use a next, we are not sure when the gc kicks in
getmetatable(hashes.settings_to_set).__mode = "kv" -- could be an option (maybe sharing makes sense)
function parsers.simple_hash_to_string(h, separator)
local t = { }
local tn = 0
for k, v in sortedhash(h) do
if v then
tn = tn + 1
t[tn] = k
end
end
return concat(t,separator or ",")
end
-- for mtx-context etc: aaaa bbbb cccc=dddd eeee=ffff
local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
local setting = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)
local splitter = setting^1
function parsers.options_to_hash(str,target)
return str and lpegmatch(splitter,str,1,target or { }) or { }
end
local splitter = lpeg.tsplitat(" ")
function parsers.options_to_array(str)
return str and lpegmatch(splitter,str) or { }
end
-- for chem (currently one level)
local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
+ C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
+ C((nestedbraces + (1-comma))^1)
local pattern_a = spaces * Ct(value*(separator*value)^0)
local function repeater(n,str)
if not n then
return str
else
local s = lpegmatch(pattern_a,str)
if n == 1 then
return unpack(s)
else
local t = { }
local tn = 0
for i=1,n do
for j=1,#s do
tn = tn + 1
t[tn] = s[j]
end
end
return unpack(t)
end
end
end
local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
+ (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
+ C((nestedbraces + (1-comma))^1)
local pattern_b = spaces * Ct(value*(separator*value)^0)
function parsers.settings_to_array_with_repeat(str,expand) -- beware: "" => { }
if expand then
return lpegmatch(pattern_b,str) or { }
else
return lpegmatch(pattern_a,str) or { }
end
end
--
local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
local pattern = Ct((space + value)^0)
function parsers.arguments_to_table(str)
return lpegmatch(pattern,str)
end
-- temporary here (unoptimized)
function parsers.getparameters(self,class,parentclass,settings)
local sc = self[class]
if not sc then
sc = { }
self[class] = sc
if parentclass then
local sp = self[parentclass]
if not sp then
sp = { }
self[parentclass] = sp
end
setmetatableindex(sc,sp)
end
end
parsers.settings_to_hash(settings,sc)
end
function parsers.listitem(str)
return gmatch(str,"[^, ]+")
end
--
local pattern = Cs { "start",
start = V("one") + V("two") + V("three"),
rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
thousand = digit * digit * digit,
one = digit * V("rest"),
two = digit * digit * V("rest"),
three = V("thousand") * V("rest"),
}
lpegpatterns.splitthousands = pattern -- maybe better in the parsers namespace ?
function parsers.splitthousands(str)
return lpegmatch(pattern,str) or str
end
-- print(parsers.splitthousands("11111111111.11"))
local optionalwhitespace = whitespace^0
lpegpatterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
lpegpatterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n "
-- inspect(lpegmatch(lpegpatterns.paragraphs,str))
-- inspect(lpegmatch(lpegpatterns.sentences,str))
-- inspect(lpegmatch(lpegpatterns.words,str))
-- handy for k="v" [, ] k="v"
local dquote = P('"')
local equal = P('=')
local escape = P('\\')
local separator = S(' ,')
local key = C((1-equal)^1)
local value = dquote * C((1-dquote-escape*dquote)^0) * dquote
----- pattern = Cf(Ct("") * Cg(key * equal * value) * separator^0,rawset)^0 * P(-1) -- was wrong
local pattern = Cf(Ct("") * (Cg(key * equal * value) * separator^0)^1,rawset)^0 * P(-1)
function parsers.keq_to_hash(str)
if str and str ~= "" then
return lpegmatch(pattern,str)
else
return { }
end
end
-- inspect(lpeg.match(pattern,[[key="value" foo="bar"]]))
local defaultspecification = { separator = ",", quote = '"' }
-- this version accepts multiple separators and quotes as used in the
-- database module
function parsers.csvsplitter(specification)
specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
local separator = specification.separator
local quotechar = specification.quote
local numbers = specification.numbers
local separator = S(separator ~= "" and separator or ",")
local whatever = C((1 - separator - newline)^0)
if quotechar and quotechar ~= "" then
local quotedata = nil
for chr in gmatch(quotechar,".") do
local quotechar = P(chr)
local quoteitem = (1 - quotechar)^0
local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
if quotedata then
quotedata = quotedata + quoteword
else
quotedata = quoteword
end
end
whatever = quotedata + whatever
end
local parser = Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
return function(data)
return lpegmatch(parser,data)
end
end
-- local crap = [[
-- first,second,third,fourth
-- "1","2","3","4"
-- "5","6","7","8"
-- ]]
--
-- local mycsvsplitter = parsers.csvsplitter { numbers = true }
--
-- local list = mycsvsplitter(crap) inspect(list)
-- and this is a slightly patched version of a version posted by Philipp Gesang
function parsers.rfc4180splitter(specification)
specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
local numbers = specification.numbers
local separator = specification.separator --> rfc: COMMA
local quotechar = P(specification.quote) --> DQUOTE
local dquotechar = quotechar * quotechar --> 2DQUOTE
/ specification.quote
local separator = S(separator ~= "" and separator or ",")
local whatever = (dquotechar + (1 - quotechar))^0
local escaped = quotechar
* (numbers and (whatever/tonumber) or Cs(whatever))
* quotechar
local non_escaped = C((1 - quotechar - newline - separator)^1)
local field = escaped + non_escaped + Cc("")
local record = Ct(field * (separator * field)^1)
local headerline = record * Cp()
local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
local headeryes = Ct(morerecords)
local headernop = Ct(record * morerecords)
return function(data,getheader)
if getheader then
local header, position = lpegmatch(headerline,data)
local data = lpegmatch(headeryes,data,position)
return data, header
else
return lpegmatch(headernop,data)
end
end
end
-- local mycsvsplitter = parsers.rfc4180splitter { numbers = true }
--
-- local crap = [[
-- first,second,third,fourth
-- "1","2","3","4"
-- "a","b","c","d"
-- "foo","bar""baz","boogie","xyzzy"
-- ]]
--
-- local list, names = mycsvsplitter(crap,true) inspect(list) inspect(names)
-- local list, names = mycsvsplitter(crap) inspect(list) inspect(names)
-- parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
-- parsers.stepper("1-3,7,8,9")
-- parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
-- parsers.stepper(" 1 : 3, ,7 ")
-- parsers.stepper("1:4,9:13,24:*",30)
local function ranger(first,last,n,action)
if not first then
-- forget about it
elseif last == true then
for i=first,n or first do
action(i)
end
elseif last then
for i=first,last do
action(i)
end
else
action(first)
end
end
local cardinal = lpegpatterns.cardinal / tonumber
local spacers = lpegpatterns.spacer^0
local endofstring = lpegpatterns.endofstring
local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
* Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
* Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring)
function parsers.stepper(str,n,action)
if type(n) == "function" then
lpegmatch(stepper,str,1,false,n or print)
else
lpegmatch(stepper,str,1,n,action or print)
end
end
--
local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
patterns.unittotex = pattern
function parsers.unittotex(str,textmode)
return lpegmatch(textmode and pattern_text or pattern_math,str)
end
local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
function parsers.unittoxml(str)
return lpegmatch(pattern,str)
end
-- print(parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %"))
local cache = { }
local spaces = lpegpatterns.space^0
local dummy = function() end
setmetatableindex(cache,function(t,k)
local separator = P(k)
local value = (1-separator)^0
local pattern = spaces * C(value) * separator^0 * Cp()
t[k] = pattern
return pattern
end)
local commalistiterator = cache[","]
function parsers.iterator(str,separator)
local n = #str
if n == 0 then
return dummy
else
local pattern = separator and cache[separator] or commalistiterator
local p = 1
return function()
if p <= n then
local s, e = lpegmatch(pattern,str,p)
if e then
p = e
return s
end
end
end
end
end
-- for s in parsers.iterator("a b c,b,c") do
-- print(s)
-- end
local function initialize(t,name)
local source = t[name]
if source then
local result = { }
for k, v in next, t[name] do
result[k] = v
end
return result
else
return { }
end
end
local function fetch(t,name)
return t[name] or { }
end
local function process(result,more)
for k, v in next, more do
result[k] = v
end
return result
end
local name = C((1-S(", "))^1)
local parser = (Carg(1) * name / initialize) * (S(", ")^1 * (Carg(1) * name / fetch))^0
local merge = Cf(parser,process)
function parsers.mergehashes(hash,list)
return lpegmatch(merge,list,1,hash)
end
-- local t = {
-- aa = { alpha = 1, beta = 2, gamma = 3, },
-- bb = { alpha = 4, beta = 5, delta = 6, },
-- cc = { epsilon = 3 },
-- }
--
-- inspect(parsers.mergehashes(t,"aa, bb, cc"))
function parsers.runtime(time)
if not time then
time = os.runtime()
end
local days = div(time,24*60*60)
time = mod(time,24*60*60)
local hours = div(time,60*60)
time = mod(time,60*60)
local minutes = div(time,60)
local seconds = mod(time,60)
return days, hours, minutes, seconds
end
--
local spacing = whitespace^0
local apply = P("->")
local method = C((1-apply)^1)
local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
function parsers.splitmethod(str,default)
if str then
return lpegmatch(pattern,str,1,default or false)
else
return default or false, ""
end
end
-- print(parsers.splitmethod(" foo -> {bar} "))
-- print(parsers.splitmethod("foo->{bar}"))
-- print(parsers.splitmethod("foo->bar"))
-- print(parsers.splitmethod("foo"))
-- print(parsers.splitmethod("{foo}"))
-- print(parsers.splitmethod())
local p_year = lpegpatterns.digit^4 / tonumber
local pattern = Cf( Ct("") *
(
( Cg(Cc("year") * p_year)
* S("-/") * Cg(Cc("month") * cardinal)
* S("-/") * Cg(Cc("day") * cardinal)
) +
( Cg(Cc("day") * cardinal)
* S("-/") * Cg(Cc("month") * cardinal)
* S("-/") * Cg(Cc("year") * p_year)
)
)
* P(" ") * Cg(Cc("hour") * cardinal)
* P(":") * Cg(Cc("min") * cardinal)
* (P(":") * Cg(Cc("sec") * cardinal))^-1
, rawset)
lpegpatterns.splittime = pattern
function parsers.totime(str)
return lpegmatch(pattern,str)
end
-- print(os.time(parsers.totime("2019-03-05 12:12:12")))
-- print(os.time(parsers.totime("2019/03/05 12:12:12")))
-- print(os.time(parsers.totime("05-03-2019 12:12:12")))
-- print(os.time(parsers.totime("05/03/2019 12:12:12")))