Source code for k1lib.serpent

# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
"""
This module is for Lua's serpent module, which serializes object into a string
similar to json. But maddingly, no one has actually wrote a serialization/deserialization
code in Python before, and I desperately need it for a factorio project. Because
this was written in a rush, I'm in no way guaranteeing that it will work on all
serialized objects out there, but from my testing, it seems pretty robust. This
is exposed automatically with::

   from k1lib.imports import *
   serpent.loads(...) # exposed
"""
import k1lib, json, re, io; from collections import deque
__all__ = ["deconstruct", "listCorrection", "loads_monolith", "loads_fragments", "loads", "dumps"]
def _deconstruct(s:str, i:int, meta, stack, ctx, autoInc):                       # _deconstruct
    x = s.find("{", i); y = s.find("}", i)                                       # _deconstruct
    if y < 0: return -1, None                                                    # _deconstruct
    if x < y and x >= 0: # opens a new context and saves parent context to stack # _deconstruct
        if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:x] # not yet determined whether it's a list or dict # _deconstruct
        stack.append(ctx); ctx = [autoInc(), x, None, None, ctx[0]]; return x+1, ctx # _deconstruct
    else: # closes the current context and go back to the parent context         # _deconstruct
        if ctx[3] is None: ctx[3] = "=" in s[ctx[1]:y]                           # _deconstruct
        ctx[2] = y; meta.append(ctx); ctx = stack.pop(); return y+1, ctx         # _deconstruct
[docs]def deconstruct(s:str) -> "list[list[5]]": # deconstruct """Not intended for the end user. Deconstructs and grab metadata of some lua objects. Example:: a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}' serpent.deconstruct(a) That returns:: [['_v3', 21, 27, False, '_v2'], ['_v2', 14, 28, True, '_v1'], ['_v1', 7, 29, True, '_v0'], ['_v4', 31, 35, True, '_v0'], ['_v0', 0, 36, False, 'root']] The columns are: [unique index of bracket, start byte, end byte, is it a dictionary?, parent index] This is a crucial step within :meth:`listCorrection`""" # deconstruct autoInc = k1lib.AutoIncrement(prefix="_v"); meta = []; stack = deque() # deconstruct i = 0; n = len(s); ctx = ["root", None, None, None, None] # [idx, sB, eB, isDict?] # deconstruct while i < n: i, ctx = _deconstruct(s, i, meta, stack, ctx, autoInc) # deconstruct return meta # deconstruct
[docs]def listCorrection(s:str) -> str: # listCorrection """Not intended for the end user. Corrects for lists in Lua. Example:: a = '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}' serpent.listCorrection(a) # returns '[1,2,3,{a=3,b={c=6,d=[5,6,7]}},{b=3}]' See how some pointy brackets have been replaced with square brackets? This is because there are no list or tuple types in lua, and there are also no sets in json, so kinda have to roll my own solution""" # listCorrection b = bytearray(s.encode()) # listCorrection for idx, sB, eB, isDict, pIdx in deconstruct(s): # listCorrection if not isDict: b[sB] = b"["[0]; b[eB] = b"]"[0] # listCorrection return bytes(b).decode() # listCorrection
# def _loads(lua): # listCorrection # matches = re.findall("[a-zA-Z_\-\"\[\]]+[ ]*=", lua); replacements = [m.rstrip("= ").strip("[]\"'") for m in matches] # listCorrection # for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ') # listCorrection # return json.loads(lua.replace(" ", "")) # listCorrection p1 = re.compile("[a-zA-Z0-9_\-\"\[\]]+[ ]*=") # listCorrection
[docs]def loads_monolith(lua:str) -> object: # loads_monolith """Not intended for the end user. Core loading mechanism. See :meth:`loads`""" # loads_monolith lua = listCorrection(lua); matches = sorted(re.findall(p1, lua), key=lambda x: -len(x)) # loads_monolith replacements = [m.rstrip("= ").strip("[]\"'") for m in matches] # loads_monolith for x, y in zip(matches, replacements): lua = lua.replace(x, f'"{y}": ') # loads_monolith return json.loads(lua) # loads_monolith
[docs]def loads_fragments(lua:str) -> object: # loads_fragments """Not intended for the end user. See :meth:`loads`. Deserializes lua objects, breaking up the work into multiple fragments. So here's the general gist:: s = "{1, 2, 3, {4, 5, 6}, {7, 8, 9}}" # then, we grab the fragments, which are the top level {} blocks, assigning a unique key (the character \ue000 and autoInc index) fragments = {"\ue0000": {4, 5, 6}, "\ue0001": {7, 8, 9}} # then, we replace the fragments with their keys s = "{1, 2, 3, "\ue0000", "\ue0001"}" # then we load s, it will run fast since the fragments are just simple strings s = serpent.loads_monolith(s) # then we patch s, replacing the keys with actual parsed objects s = {1, 2, 3, {4, 5, 6}, {7, 8, 9}} Why so convoluted? Well turns out, loads_monolith is pretty slow. It has a for loop there, and there's a .replace() within, which is a hidden for loop that copies the entire string over and over again, which slows it down. Haven't done extensive testing, but feels like O(n^2) time complexity while I was working with it. So this optimization assumes that the top level {} blocks are small, but there're many of them, thus this assigns less work (shorter string, hence faster .replace()) to each loads_monolith() calls. So if there're 10k fragments, this can potentially be 10k faster. This assumption of course is not that great and not very general, and you can easily find ways around it. But it's just enough for my use case right now, which is to analyze factorio. The correct way would be to dive deeper and benchmark everything more clearly, but I don't have time for that.""" # loads_fragments # splits into fragments # loads_fragments a = [[f"\ue000{i}",loads_monolith(lua[sB:eB+1]), sB, eB] for i,[idx,sB,eB,d,pIdx] in enumerate(deconstruct(lua)) if pIdx == "_v0"] # loads_fragments # loads_fragments # fast way of replacing the fragments with the pua unicodes # loads_fragments s = io.StringIO(); lastSeek = 0; d = {pua:o for pua, o, sB, eB in a} # loads_fragments for pua,obj,sB,eB in a: s.write(lua[lastSeek:sB]); s.write(f'"{pua}"'); lastSeek = eB+1 # loads_fragments s.write(lua[lastSeek:]); s.seek(0); c = loads_monolith(s.read()) # loads_fragments # loads_fragments # injecting # loads_fragments if isinstance(c, dict): return {k:d.get(v, v) for k,v in c.items()} # loads_fragments elif isinstance(c, list): return [d.get(v, v) for v in c] # loads_fragments
def preprocess(lua:str): return "".join([x.split("--")[0] for x in lua.split("\n")]).replace(" ", "").replace("\\", "\\\\") # preprocess
[docs]def loads(lua:str): # loads """Deserialize lua objects from string. Example:: # returns [1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}] loads("{ 1, 2, 3, { a = 3, b = { c = 6, d = {5, 6, 7} } }, { b = 3 } }") See also: :meth:`dumps` What's the relative speed here? Because everything is written in Python, I expect it to be slower than json, but by how much? Here're some benchmark results: | | lua | json | binary | | ----------- | ---- | ---- | ------ | | from python | 21us | 11us | 184us | | to python | 92us | 10us | 5.8us | The "lua" column uses "serpent.loads()", "json" uses "json.loads()", and "binary" uses "dill.loads()" """ # loads lua = preprocess(lua) # loads return loads_fragments(lua) if len(lua) > 1000 else loads_monolith(lua) # loads
def _dumps_dict(obj): # _dumps_dict for k,v in obj.items(): k = f'["{k}"]' if "-" in k else k; yield f"{k}={dumps(v)}" # _dumps_dict
[docs]def dumps(obj) -> str: # dumps """Serialize Python objects into lua string. Example:: # returns '{1,2,3,{a=3,b={c=6,d={5,6,7}}},{b=3}}' serpent.dumps([1, 2, 3, {'a': 3, 'b': {'c': 6, 'd': [5, 6, 7]}}, {'b': 3}]) See also: :meth:`loads`""" # dumps if isinstance(obj, dict): return f"{{{','.join(_dumps_dict(obj))}}}" # dumps if isinstance(obj, (list, tuple)): return f"{{{','.join([dumps(e) for e in obj])}}}" # dumps return json.dumps(obj) # dumps