Source code for k1lib.cli.ktree

# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
import k1lib as k1; import k1lib.cli as cli
from k1lib.cli import BaseCli
import os; from collections import deque, defaultdict
__all__ = ["create", "apply", "filt", "select", "flatten", "pretty", "display", "search", "walk"]
settings = k1.settings.cli
kSett = k1.Settings().add("tok", "\ue000", "Special token for internal processing")
settings.add("ktree", kSett, "cli.ktree module settings");
def groupBy2(x, depth=2): # stripped down, much faster version                   # groupBy2
    d = {} # for some reason doing this instead of default dict is faster        # groupBy2
    for e in x: d[e[0]] = []                                                     # groupBy2
    for e in x: d[e[0]].append(e[1:])                                            # groupBy2
    if depth > 0:                                                                # groupBy2
        return [[k,groupBy2(v, depth-1)] for k,v in d.items()]                   # groupBy2
    else: return list(d.items())                                                 # groupBy2
    return [[k,groupBy2(v, depth-1)] for k,v in d.items()] if depth > 0 else list(d.items()) # groupBy2
def _mktree(depth):                                                              # _mktree
    if depth <= 0: return cli.iden()                                             # _mktree
    return cli.groupBy(0, True) | cli.apply(_mktree(depth-1) | cli.aS(list), 1)  # _mktree
class mktree1(BaseCli):                                                          # mktree1
    def __ror__(self, it):                                                       # mktree1
        return groupBy2(it, len(it[0])-1)                                        # mktree1
        it = it | cli.deref(2); return (it | _mktree(len(it[0])-1))# if len(it) > 0 else [] # mktree1
def prune1(x, depth=2): # eliminating empty elements. For some reason, _mktree() creates an empty element at each leaf. No idea why and too lazy to dig in further # prune1
    for elem in x:                                                               # prune1
        if len(elem) == 0: continue                                              # prune1
        s, children = elem                                                       # prune1
        if depth > 0: children = list(prune1(children, depth-1))                 # prune1
        yield [s, children]                                                      # prune1
def prune2(x, depth=2) -> "[children, hasToken]": # trying to grab whether there is a branch that ends at the parent or not # prune2
    hasToken = False; ans = []                                                   # prune2
    for s, children in x:                                                        # prune2
        hT = False; value = None                                                 # prune2
        if depth > 0:                                                            # prune2
            children, hT = prune2(children, depth-1)                             # prune2
            hasToken = hasToken or s == kSett.tok                                # prune2
            if len(children) == 0: hT = 1                                        # prune2
        else:                                                                    # prune2
            hasToken = hasToken or s == kSett.tok; hT = 1                        # prune2
            value = children[0][0]; children = []                                # prune2
        ans.append([s, children, hT+0, value])                                   # prune2
    return ans, hasToken                                                         # prune2
def migrate(x, depth=0, depthSinceTok=0, cache=None): # trying to migrate the detached nested value fields to the right position and depth # migrate
    if cache is None: cache = []                                                 # migrate
    for elem in x:                                                               # migrate
        s,ch,hT,v = elem; isTok = s == kSett.tok                                 # migrate
        if isTok: cache.append([depth, depthSinceTok+1, v])                      # migrate
        ch = list(migrate(ch, depth+1, depthSinceTok+isTok, cache))              # migrate
        if cache:                                                                # migrate
            d,dt,value = cache[-1]                                               # migrate
            if d-dt==depth: cache.pop(); v = value                               # migrate
        yield s,ch,hT,v                                                          # migrate
def prune3(x, depth=0): # trying to eliminate the "virtual" elements tagged with the token # prune3
    ans = []                                                                     # prune3
    for s, children, num, value in x:                                            # prune3
        if s == kSett.tok: continue                                              # prune3
        ans.append([s, prune3(children, depth+1), num, depth, value])            # prune3
    return ans                                                                   # prune3
def prune(x, depth): return prune3(migrate(prune2(prune1(x, depth), depth)[0]))  # prune
def prune_makeup(x):                                                             # prune_makeup
    for elem in x:                                                               # prune_makeup
        yield [elem[0], prune_makeup(elem[1]), 0] if len(elem) > 1 else [elem[0], [], 0] # last column for extra metadata slot # prune_makeup
[docs]class create(BaseCli): # create
[docs] def __init__(self, n=0): # create """Creates a tree structure from a flat table. Example:: flat = [["a", "b"], ["a", "b", "2"], ["a", "b", "3"], ["a", "c", "4"], ["a", "c", "5"], ["a", "d"]] flat | ktree.create() That will return this:: [['a', [ ['b', [ ['2', [], 1, 2, None], ['3', [], 1, 2, None] ], 1, 1, None], ['c', [ ['4', [], 1, 2, None], ['5', [], 1, 2, None] ], 0, 1, None], ['d', [], 1, 1, None] ], 0, 0, None]] Each element in the array is represented by a 5-tuple: ``(name, children, ends here?, depth, value)``: - name: name of this element - children: a list of elements with similar structure to this element - ends here?: whether there is a row in the flat table that exists for this particular element. Most has it, like "a/b/2" and "a/b/3", but some don't, like "a/c", so "ends here?" of "c" is 0. - depth: depth of this element inside the tree - value: value associated with this element, explained more below In some datasets, you're not just interested in the tree structure, but you also want to manipulate some data structures associated with the element in the tree. You can specify how much value at the end you want to capture by using the param ``.n``. It's a little hard to explain with words, so here's an example:: flat1 = [["a", "$0"], ["a", "b", "$1"], ["a", "b", "2", "$2"], ["a", "b", "3", "$3"], ["a", "c", "4", "$4"], ["a", "c", "5", "$5"], ["a", "d", "$6"]] flat1 | ktree.create(1) That will return this:: [['a', [ ['b', [ ['2', [], 0, 2, '$2'], ['3', [], 0, 2, '$3'] ], 1, 1, '$1'], ['c', [ ['4', [], 0, 2, '$4'], ['5', [], 0, 2, '$5'] ], 0, 1, None], ['d', [], 1, 1, '$6'] ], 1, 0, '$0']] This looks pretty much identical to the previous result, but with the None values replaced with the last column of the flattened tree. You can specify the number of columns to take from the end of the flattened tree. So this is possible:: flat2 = [["a", *"$0"], ["a", "b", *"$1"], ["a", "b", "2", *"$2"], ["a", "b", "3", *"$3"], ["a", "c", "4", *"$4"], ["a", "c", "5", *"$5"], ["a", "d", *"$6"]] flat2 | ktree.create(2) That will return this:: [['a', [ ['b', [ ['2', [], 0, 2, ('$', '2')], ['3', [], 0, 2, ('$', '3')] ], 1, 1, ('$', '1')], ['c', [ ['4', [], 0, 2, ('$', '4')], ['5', [], 0, 2, ('$', '5')] ], 0, 1, None], ['d', [], 1, 1, ('$', '6')] ], 1, 0, ('$', '0')]] It'll just bunch up the values into a tuple. About performance, this implementation isn't the best. If the depth is not too crazy (say, <3) then a throughput of 442k rows/s can be achieved. But if the depth is big (say ~20), then it can be as slow as 6k rows/s. .. warning:: Internally, this makes use of the special character "\\ue000", configurable at ``settings.cli.ktree.tok`` to create the tree. This is in Unicode's private use area, so it shouldn't appear anywhere in normal text. However, if your application uses this character, it's best to change it to something else """ # create self.n = n # create
[docs] def __ror__(self, it): it = _preprocess(it, self.n); return prune(it | mktree1(), depth=len(it[0])-2) # create
def _preprocess(it, n): # _preprocess try: it[:]; len(it) # _preprocess except: it = it | cli.deref(2) # _preprocess if n == 0: it = [[*row, None] for row in it] # _preprocess elif n == 1: it = [[*row[:-1], row[-1]] for row in it] # _preprocess else: it = [[*row[:-n], tuple(row[-n:])] for row in it] # _preprocess values = [row[-1] for row in it] # _preprocess return [[*row, v] for row, v in zip(it | cli.apply(lambda row: row[:-1]) | cli.transpose(fill=kSett.tok) | cli.transpose(), values)] # _preprocess def _applyRet(x, f) -> "ch": # _applyRet ans = [] # _applyRet for s, ch, *other in x: ans.append(f(s, _applyRet(ch, f), *other)) # _applyRet return ans # _applyRet
[docs]class apply(BaseCli): # apply
[docs] def __init__(self, f): # apply """Transforms the tree recursively. Example:: a = flat | ktree.create() | ktree.apply(lambda s,ch,e,d,v: [s,ch,(s | (tryout(0) | aS(int))) + (ch | cut(2) | toSum())]) That returns this:: [['a', [['b', [['2', [], 2], ['3', [], 3]], 5], ['c', [['4', [], 4], ['5', [], 5]], 9], ['d', [], 0]], 14]] Extracting the final result:: a | cut(2) | item() # returns 14 Here, I'm trying to add up all elements that can be converted into an integer, but do that at every node in the tree. The ``s | (tryout(0) | aS(int))`` part is trying to convert it into an integer, returning 0 if it can't do it. Then, the ``ch | cut(2) | toSum()`` tries to add up the result of all children underneath it. The results naturally trickle upwards and you get the result "14" at the top level, at which point it's trivial to extract. If you're using a lot of cli tools inside the lambda function and the performance is terrible, you can try to initialize the cli tools outside the lambda, and then just use it inside, to eliminate cli initialization cost, like this:: def getVal(x): try: return int(x) except: return 0 sumF = cut(2) | toSum() flat | ktree.create() | ktree.apply(lambda s,ch,e,d: [s,ch,getVal(s) + sumF(ch)]) This should give you enough performance for your purpose, but if you want even more perf, don't use cli tools inside the lambda at all.""" # apply self.f = f # apply
[docs] def __ror__(self, it): return _applyRet(it, self.f) # apply
def _filter(x, f) -> "ch": # _filter ans = [] # _filter for s, ch, *other in x: # _filter ch = _filter(ch, f) # _filter res = f(s, ch, *other) # _filter if res: ans.append([s, ch, *other]) # _filter return ans # _filter
[docs]class filt(BaseCli): # filt
[docs] def __init__(self, f): # filt """Filters out elements that don't pass a certain predicate. Example:: # returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]] flat | ktree.create() | ktree.filt(lambda s,ch,e,d: d<2) Essentially, this filters out any elements that are of depth 2 or more.""" # filt self.f = f # filt
[docs] def __ror__(self, it): return _filter(it, self.f) # filt
[docs]class select(BaseCli): # select
[docs] def __init__(self, term:str, depth:int, exact:bool=True): # select """Selects for a single element at a particular depth. Example:: # returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/*/b" flat | ktree.create() | ktree.select("b", 1) # returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]]. It selects for "/*/*/b", which does not exist. But it still returns all elements in depth 1, as depth 1 is not selected for flat | ktree.create() | ktree.select("b", 2) # returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/a/b" flat | ktree.create() | ktree.select("a", 0) | ktree.select("b", 1) # returns []. It selects for "/c/b", which does not exist flat | ktree.create() | ktree.select("c", 0) | ktree.select("b", 1) :param term: term to select :param depth: depth at which to select term :param exact: if True, select for term exactly, else select for elements that contains term""" # select self.term = term; self.depth = depth; self.exact = exact # select
[docs] def __ror__(self, it): # select term = self.term; depth = self.depth; exact = self.exact # select if exact: return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term == s)) # select else: return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term in s)) # select
def _flatten(x, joinF, prefix=""): # _flatten for s,ch,*other in x: pre = joinF(prefix, s); yield [pre,*other]; yield from _flatten(ch, joinF, prefix=pre) # _flatten
[docs]class flatten(BaseCli): # flatten
[docs] def __init__(self, joinF=None, delim="/"): # flatten """Flattens the tree. Example:: flat | ktree.create() | ktree.flatten() | deref() This returns:: [['/a', 0, 0], ['/a/b', 1, 1], ['/a/b/2', 1, 2], ['/a/b/3', 1, 2], ['/a/c', 0, 1], ['/a/c/4', 1, 2], ['/a/c/5', 1, 2], ['/a/d', 1, 1]] :param joinF: a function that takes in 2 parameters: prefix (like "/a/b") and element name (like "2"), to form a new prefix ("/a/b/2"). Defaulted to ``lambda p,s: f"{p}/{s}"`` :param delim: if use the default joinF, this specifies the delimiter to use""" # flatten self.joinF = joinF or (lambda p,s: f"{p}{delim}{s}") # flatten
[docs] def __ror__(self, it): return _flatten(it, self.joinF) # flatten
def _prettyDepth(x, depth=0): # _prettyDepth for s,ch,*other in x: # _prettyDepth yield f"{depth}".ljust(3) + f" "*(depth+1) + ([s, *other] | cli.join(' - ')) # _prettyDepth yield from _prettyDepth(ch, depth+1) # _prettyDepth def _prettyNoDepth(x, depth=0): # _prettyNoDepth for s,ch,*other in x: # _prettyNoDepth yield f" "*depth + f"{s} - {other | cli.join(' - ')}" # _prettyNoDepth yield from _prettyNoDepth(ch, depth+1) # _prettyNoDepth
[docs]class pretty(BaseCli): # pretty
[docs] def __init__(self, depth=True): # pretty """Converts tree into a string iterator, indented with the correct level of depth. See also: :class:`display` :param depth: whether to display the depth at the start of each line or not""" # pretty self.depth = depth # pretty
[docs] def __ror__(self, it): return _prettyDepth(it) if self.depth else _prettyNoDepth(it) # pretty
[docs]class display(BaseCli): # display
[docs] def __init__(self, lines=10, depth=True): # display """Displays the tree. Example:: flat | ktree.create() | ktree.display() # equivalent to the above flat | ktree.create() | ktree.pretty() | stdout() :param lines: line limit. Put "None" to display all :param depth: whether to display the depth at the start of each line or not""" # display self.lines = lines; self.depth = depth # display
[docs] def __ror__(self, it): it | pretty(self.depth) | cli.head(self.lines) | cli.stdout() # display
_walk = lambda f: cli.aS(lambda fn: os.walk(os.path.expanduser(fn)) | cli.cut(0, 2) | cli.ungroup() | cli.join(os.sep).all() | f | cli.op().split(os.sep).all() | create()) # search
[docs]def walk(fn=None, f=None): # walk """Walks the file system and generates a tree of all files and folders. Example:: "/home/ubuntu" | ktree.walk() # returns a tree ktree.walk("/home/ubundu") # same as above ktree.walk(".") # can also do relative paths "/home/ubuntu" | ktree.walk(f=~grep("git")) # returns a tree with no hidden git-related files ktree.walk("/home/ubuntu", ~grep("git")) # same as above :param fn: folder name :param f: optional function if you want to filter out paths (Iterator[str])""" # walk f = f or cli.iden(); return _walk(f) if fn is None else fn | _walk(f) # walk