# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
"""
This module is for Lua's serpent module, which serializes object into a string
similar to json. But maddingly, no one has actually wrote a serialization/deserialization
code in Python before, and I desperately need it for a factorio project. Because
this was written in a rush, I'm in no way guaranteeing that it will work on all
serialized objects out there, but from my testing, it seems pretty robust. This
is exposed automatically with::
from k1lib.imports import *
serpent.loads(...) # exposed
"""
import k1lib, json, re, io; from collections import deque
__all__ = ["deconstruct", "listCorrection", "loads_monolith", "loads_fragments", "loads", "dumps"]
def _deconstruct(s:str, i:int, meta, stack, ctx, autoInc): # _deconstruct
x = s.find("{", i); y = s.find("}", i) # _deconstruct
if y < 0: return -1, None # _deconstruct
if x < y and x >= 0: # opens a new context and saves parent context to stack # _deconstruct
if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:x] # not yet determined whether it's a list or dict # _deconstruct
stack.append(ctx); ctx = [autoInc(), x, None, None, ctx[0]]; return x+1, ctx # _deconstruct
else: # closes the current context and go back to the parent context # _deconstruct
if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:y] # _deconstruct
ctx[2] = y; meta.append(ctx); ctx = stack.pop(); return y+1, ctx # _deconstruct
[docs]def deconstruct(s:str) -> "list[list[5]]": # deconstruct
"""Not intended for the end user. Deconstructs and grab metadata of some
lua objects. Example::
a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
serpent.deconstruct(a)
That returns::
[['_v3', 21, 27, False, '_v2'],
['_v2', 14, 28, True, '_v1'],
['_v1', 7, 29, True, '_v0'],
['_v4', 31, 35, True, '_v0'],
['_v0', 0, 36, False, 'root']]
The columns are: [unique index of bracket, start byte, end byte, is it a dictionary?, parent index]
This is a crucial step within :meth:`listCorrection`""" # deconstruct
autoInc = k1lib.AutoIncrement(prefix="_v"); meta = []; stack = deque() # deconstruct
i = 0; n = len(s); ctx = ["root", None, None, None, None] # [idx, sB, eB, isDict?] # deconstruct
while i < n: i, ctx = _deconstruct(s, i, meta, stack, ctx, autoInc) # deconstruct
return meta # deconstruct
[docs]def listCorrection(s:str) -> str: # listCorrection
"""Not intended for the end user. Corrects for lists in Lua.
Example::
a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
serpent.listCorrection(a) # returns '[1,2,3,{a=3,b={c=6,d=[5,6,7]}},{b=3}]'
See how some pointy brackets have been replaced with square brackets?
This is because there are no list or tuple types in lua, and there are also no sets
in json, so kinda have to roll my own solution""" # listCorrection
b = bytearray(s.encode()) # listCorrection
for idx, sB, eB, isDict, pIdx in deconstruct(s): # listCorrection
if not isDict: b[sB] = b"["[0]; b[eB] = b"]"[0] # listCorrection
return bytes(b).decode() # listCorrection
# def _loads(lua): # listCorrection
# matches = re.findall("[a-zA-Z_\-\"\[\]]+[ ]*=", lua); replacements = [m.rstrip("= ").strip("[]\"'") for m in matches] # listCorrection
# for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ') # listCorrection
# return json.loads(lua.replace(" ", "")) # listCorrection
p1 = re.compile("[a-zA-Z0-9_\-\"\[\]]+[ ]*=") # listCorrection
[docs]def loads_monolith(lua:str) -> object: # loads_monolith
"""Not intended for the end user. Core loading mechanism. See :meth:`loads`""" # loads_monolith
lua = listCorrection(lua); matches = sorted(re.findall(p1, lua), key=lambda x: -len(x)) # loads_monolith
replacements = [m.rstrip("= ").strip("[]\"'") for m in matches] # loads_monolith
for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ') # loads_monolith
return json.loads(lua) # loads_monolith
[docs]def loads_fragments(lua:str) -> object: # loads_fragments
"""Not intended for the end user. See :meth:`loads`. Deserializes lua objects,
breaking up the work into multiple fragments. So here's the general gist::
s = "{1, 2, 3, {4, 5, 6}, {7, 8, 9}}"
# then, we grab the fragments, which are the top level {} blocks, assigning a unique key (the character \ue000 and autoInc index)
fragments = {"\ue0000": {4, 5, 6}, "\ue0001": {7, 8, 9}}
# then, we replace the fragments with their keys
s = "{1, 2, 3, "\ue0000", "\ue0001"}"
# then we load s, it will run fast since the fragments are just simple strings
s = serpent.loads_monolith(s)
# then we patch s, replacing the keys with actual parsed objects
s = {1, 2, 3, {4, 5, 6}, {7, 8, 9}}
Why so convoluted? Well turns out, loads_monolith is pretty slow. It has a for loop there,
and there's a .replace() within, which is a hidden for loop that copies the entire
string over and over again, which slows it down. Haven't done extensive testing, but
feels like O(n^2) time complexity while I was working with it.
So this optimization assumes that the top level {} blocks are small, but there're many
of them, thus this assigns less work (shorter string, hence faster .replace()) to each
loads_monolith() calls. So if there're 10k fragments, this can potentially be 10k faster.
This assumption of course is not that great and not very general, and you can easily find
ways around it. But it's just enough for my use case right now, which is to analyze factorio.
The correct way would be to dive deeper and benchmark everything more clearly, but I don't
have time for that.""" # loads_fragments
# splits into fragments # loads_fragments
a = [[f"\ue000{i}",loads_monolith(lua[sB:eB+1]), sB, eB] for i,[idx,sB,eB,d,pIdx] in enumerate(deconstruct(lua)) if pIdx == "_v0"] # loads_fragments
# loads_fragments
# fast way of replacing the fragments with the pua unicodes # loads_fragments
s = io.StringIO(); lastSeek = 0; d = {pua:o for pua, o, sB, eB in a} # loads_fragments
for pua,obj,sB,eB in a: s.write(lua[lastSeek:sB]); s.write(f'"{pua}"'); lastSeek = eB+1 # loads_fragments
s.write(lua[lastSeek:]); s.seek(0); c = loads_monolith(s.read()) # loads_fragments
# loads_fragments
# injecting # loads_fragments
if isinstance(c, dict): return {k:d.get(v, v) for k,v in c.items()} # loads_fragments
elif isinstance(c, list): return [d.get(v, v) for v in c] # loads_fragments
def preprocess(lua:str): return "".join([x.split("--")[0] for x in lua.split("\n")]).replace(" ", "").replace("\\", "\\\\") # preprocess
[docs]def loads(lua:str): # loads
"""Deserialize lua objects from string.
Example::
# returns [1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}]
loads("{ 1, 2, 3, { a = 3, b = { c = 6, d = {5, 6, 7} } }, { b = 3 } }")
See also: :meth:`dumps`
What's the relative speed here? Because everything is written in Python, I
expect it to be slower than json, but by how much? Here're some benchmark results:
| | lua | json | binary |
| ----------- | ---- | ---- | ------ |
| from python | 21us | 11us | 184us |
| to python | 92us | 10us | 5.8us |
The "lua" column uses "serpent.loads()", "json" uses "json.loads()", and
"binary" uses "dill.loads()"
""" # loads
lua = preprocess(lua) # loads
return loads_fragments(lua) if len(lua) > 1000 else loads_monolith(lua) # loads
def _dumps_dict(obj): # _dumps_dict
for k,v in obj.items(): k = f'["{k}"]' if "-" in k else k; yield f"{k}={dumps(v)}" # _dumps_dict
[docs]def dumps(obj) -> str: # dumps
"""Serialize Python objects into lua string.
Example::
# returns '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}'
serpent.dumps([1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}])
See also: :meth:`loads`""" # dumps
if isinstance(obj, dict): return f"{{{','.join(_dumps_dict(obj))}}}" # dumps
if isinstance(obj, (list, tuple)): return f"{{{','.join([dumps(e) for e in obj])}}}" # dumps
return json.dumps(obj) # dumps