Source code for k1lib.cli.ktree

# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
import k1lib as k1; import k1lib.cli as cli
from k1lib.cli import BaseCli
import os; from collections import deque, defaultdict
__all__ = ["create", "apply", "filt", "select", "flatten", "pretty", "display", "search", "walk"]
settings = k1.settings.cli
kSett = k1.Settings().add("tok", "\ue000", "Special token for internal processing")
settings.add("ktree", kSett, "cli.ktree module settings");
def groupBy2(x, depth=2): # stripped down, much faster version                   # groupBy2
    d = {} # for some reason doing this instead of default dict is faster        # groupBy2
    for e in x: d[e[0]] = []                                                     # groupBy2
    for e in x: d[e[0]].append(e[1:])                                            # groupBy2
    if depth > 0:                                                                # groupBy2
        return [[k,groupBy2(v, depth-1)] for k,v in d.items()]                   # groupBy2
    else: return list(d.items())                                                 # groupBy2
    return [[k,groupBy2(v, depth-1)] for k,v in d.items()] if depth > 0 else list(d.items()) # groupBy2
def _mktree(depth):                                                              # _mktree
    if depth <= 0: return cli.iden()                                             # _mktree
    return cli.groupBy(0, True) | cli.apply(_mktree(depth-1) | cli.aS(list), 1)  # _mktree
class mktree1(BaseCli):                                                          # mktree1
    def __ror__(self, it):                                                       # mktree1
        return groupBy2(it, len(it[0])-1)                                        # mktree1
        it = it | cli.deref(2); return (it | _mktree(len(it[0])-1))# if len(it) > 0 else [] # mktree1
def prune1(x, depth=2): # eliminating empty elements. For some reason, _mktree() creates an empty element at each leaf. No idea why and too lazy to dig in further # prune1
    for elem in x:                                                               # prune1
        if len(elem) == 0: continue                                              # prune1
        s, children = elem                                                       # prune1
        if depth > 0: children = list(prune1(children, depth-1))                 # prune1
        yield [s, children]                                                      # prune1
def prune2(x, depth=2) -> "[children, hasToken]": # trying to grab whether there is a branch that ends at the parent or not # prune2
    hasToken = False; ans = []                                                   # prune2
    for s, children in x:                                                        # prune2
        hT = False; value = None                                                 # prune2
        if depth > 0:                                                            # prune2
            children, hT = prune2(children, depth-1)                             # prune2
            hasToken = hasToken or s == kSett.tok                                # prune2
            if len(children) == 0: hT = 1                                        # prune2
        else:                                                                    # prune2
            hasToken = hasToken or s == kSett.tok; hT = 1                        # prune2
            value = children[0][0]; children = []                                # prune2
        ans.append([s, children, hT+0, value])                                   # prune2
    return ans, hasToken                                                         # prune2
def migrate(x, depth=0, depthSinceTok=0, cache=None): # trying to migrate the detached nested value fields to the right position and depth # migrate
    if cache is None: cache = []                                                 # migrate
    for elem in x:                                                               # migrate
        s,ch,hT,v = elem; isTok = s == kSett.tok                                 # migrate
        if isTok: cache.append([depth, depthSinceTok+1, v])                      # migrate
        ch = list(migrate(ch, depth+1, depthSinceTok+isTok, cache))              # migrate
        if cache:                                                                # migrate
            d,dt,value = cache[-1]                                               # migrate
            if d-dt==depth: cache.pop(); v = value                               # migrate
        yield s,ch,hT,v                                                          # migrate
def prune3(x, depth=0): # trying to eliminate the "virtual" elements tagged with the token # prune3
    ans = []                                                                     # prune3
    for s, children, num, value in x:                                            # prune3
        if s == kSett.tok: continue                                              # prune3
        ans.append([s, prune3(children, depth+1), num, depth, value])            # prune3
    return ans                                                                   # prune3
def prune(x, depth): return prune3(migrate(prune2(prune1(x, depth), depth)[0]))  # prune
def prune_makeup(x):                                                             # prune_makeup
    for elem in x:                                                               # prune_makeup
        yield [elem[0], prune_makeup(elem[1]), 0] if len(elem) > 1 else [elem[0], [], 0] # last column for extra metadata slot # prune_makeup

[docs]
class create(BaseCli):                                                           # create

[docs]
    def __init__(self, n=0):                                                     # create
        """Creates a tree structure from a flat table.
Example::

    flat = [["a", "b"],
            ["a", "b", "2"],
            ["a", "b", "3"],
            ["a", "c", "4"],
            ["a", "c", "5"],
            ["a", "d"]]
    flat | ktree.create()

That will return this::

    [['a', [
        ['b', [
            ['2', [], 1, 2, None],
            ['3', [], 1, 2, None]
        ], 1, 1, None],
        ['c', [
            ['4', [], 1, 2, None],
            ['5', [], 1, 2, None]
        ], 0, 1, None],
        ['d', [], 1, 1, None]
    ], 0, 0, None]]

Each element in the array is represented by a 5-tuple: ``(name, children, ends here?, depth, value)``:
- name: name of this element
- children: a list of elements with similar structure to this element
- ends here?: whether there is a row in the flat table that exists for this
particular element. Most has it, like "a/b/2" and "a/b/3", but some don't,
like "a/c", so "ends here?" of "c" is 0.
- depth: depth of this element inside the tree
- value: value associated with this element, explained more below

In some datasets, you're not just interested in the tree structure, but you also want to manipulate some data
structures associated with the element in the tree. You can specify how much value at the end you want to
capture by using the param ``.n``. It's a little hard to explain with words, so here's an example::

    flat1 = [["a", "$0"],
             ["a", "b", "$1"],
             ["a", "b", "2", "$2"],
             ["a", "b", "3", "$3"],
             ["a", "c", "4", "$4"],
             ["a", "c", "5", "$5"],
             ["a", "d", "$6"]]
    flat1 | ktree.create(1)

That will return this::

    [['a', [
        ['b', [
            ['2', [], 0, 2, '$2'],
            ['3', [], 0, 2, '$3']
        ], 1, 1, '$1'],
        ['c', [
            ['4', [], 0, 2, '$4'],
            ['5', [], 0, 2, '$5']
        ], 0, 1, None],
        ['d', [], 1, 1, '$6']
    ], 1, 0, '$0']]

This looks pretty much identical to the previous result, but with the None values replaced with
the last column of the flattened tree. You can specify the number of columns to take from the
end of the flattened tree. So this is possible::

    flat2 = [["a", *"$0"],
             ["a", "b", *"$1"],
             ["a", "b", "2", *"$2"],
             ["a", "b", "3", *"$3"],
             ["a", "c", "4", *"$4"],
             ["a", "c", "5", *"$5"],
             ["a", "d", *"$6"]]
    flat2 | ktree.create(2)

That will return this::

    [['a', [
        ['b', [
            ['2', [], 0, 2, ('$', '2')],
            ['3', [], 0, 2, ('$', '3')]
        ], 1, 1, ('$', '1')],
        ['c', [
            ['4', [], 0, 2, ('$', '4')],
            ['5', [], 0, 2, ('$', '5')]
        ], 0, 1, None],
        ['d', [], 1, 1, ('$', '6')]
    ], 1, 0, ('$', '0')]]

It'll just bunch up the values into a tuple.

About performance, this implementation isn't the best. If the depth is not too crazy (say, <3)
then a throughput of 442k rows/s can be achieved. But if the depth is big (say ~20),
then it can be as slow as 6k rows/s.

.. warning::

    Internally, this makes use of the special character "\\ue000", configurable
    at ``settings.cli.ktree.tok`` to create the tree. This is in Unicode's private
    use area, so it shouldn't appear anywhere in normal text. However, if your
    application uses this character, it's best to change it to something else
"""                                                                              # create
        self.n = n                                                               # create


[docs]
    def __ror__(self, it): it = _preprocess(it, self.n); return prune(it | mktree1(), depth=len(it[0])-2) # create


def _preprocess(it, n):                                                          # _preprocess
    try: it[:]; len(it)                                                          # _preprocess
    except: it = it | cli.deref(2)                                               # _preprocess
    if n == 0: it = [[*row, None] for row in it]                                 # _preprocess
    elif n == 1: it = [[*row[:-1], row[-1]] for row in it]                       # _preprocess
    else: it = [[*row[:-n], tuple(row[-n:])] for row in it]                      # _preprocess
    values = [row[-1] for row in it]                                             # _preprocess
    return [[*row, v] for row, v in zip(it | cli.apply(lambda row: row[:-1]) | cli.transpose(fill=kSett.tok) | cli.transpose(), values)] # _preprocess
def _applyRet(x, f) -> "ch":                                                     # _applyRet
    ans = []                                                                     # _applyRet
    for s, ch, *other in x: ans.append(f(s, _applyRet(ch, f), *other))           # _applyRet
    return ans                                                                   # _applyRet

[docs]
class apply(BaseCli):                                                            # apply

[docs]
    def __init__(self, f):                                                       # apply
        """Transforms the tree recursively.
Example::

    a = flat | ktree.create() | ktree.apply(lambda s,ch,e,d,v: [s,ch,(s | (tryout(0) | aS(int))) + (ch | cut(2) | toSum())])

That returns this::

    [['a',
      [['b', [['2', [], 2], ['3', [], 3]], 5],
       ['c', [['4', [], 4], ['5', [], 5]], 9],
       ['d', [], 0]],
      14]]

Extracting the final result::

    a | cut(2) | item() # returns 14

Here, I'm trying to add up all elements that can be converted into an
integer, but do that at every node in the tree. The ``s | (tryout(0) | aS(int))``
part is trying to convert it into an integer, returning 0 if it can't do it.
Then, the ``ch | cut(2) | toSum()`` tries to add up the result of all children
underneath it. The results naturally trickle upwards and you get the result "14"
at the top level, at which point it's trivial to extract.

If you're using a lot of cli tools inside the lambda function and the performance
is terrible, you can try to initialize the cli tools outside the lambda, and then
just use it inside, to eliminate cli initialization cost, like this::

    def getVal(x):
        try: return int(x)
        except: return 0
    sumF = cut(2) | toSum()
    flat | ktree.create() | ktree.apply(lambda s,ch,e,d: [s,ch,getVal(s) + sumF(ch)])

This should give you enough performance for your purpose, but if you want even more
perf, don't use cli tools inside the lambda at all."""                           # apply
        self.f = f                                                               # apply


[docs]
    def __ror__(self, it): return _applyRet(it, self.f)                          # apply


def _filter(x, f) -> "ch":                                                       # _filter
    ans = []                                                                     # _filter
    for s, ch, *other in x:                                                      # _filter
        ch = _filter(ch, f)                                                      # _filter
        res = f(s, ch, *other)                                                   # _filter
        if res: ans.append([s, ch, *other])                                      # _filter
    return ans                                                                   # _filter

[docs]
class filt(BaseCli):                                                             # filt

[docs]
    def __init__(self, f):                                                       # filt
        """Filters out elements that don't pass a certain predicate.
Example::

    # returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]]
    flat | ktree.create() | ktree.filt(lambda s,ch,e,d: d<2)

Essentially, this filters out any elements that are of depth 2 or more."""       # filt
        self.f = f                                                               # filt


[docs]
    def __ror__(self, it): return _filter(it, self.f)                            # filt



[docs]
class select(BaseCli):                                                           # select

[docs]
    def __init__(self, term:str, depth:int, exact:bool=True):                    # select
        """Selects for a single element at a particular depth.
Example::

    # returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/*/b"
    flat | ktree.create() | ktree.select("b", 1)
    # returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]]. It selects for "/*/*/b", which does not exist. But it still returns all elements in depth 1, as depth 1 is not selected for
    flat | ktree.create() | ktree.select("b", 2)
    # returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/a/b"
    flat | ktree.create() | ktree.select("a", 0) | ktree.select("b", 1)
    # returns []. It selects for "/c/b", which does not exist
    flat | ktree.create() | ktree.select("c", 0) | ktree.select("b", 1)

:param term: term to select
:param depth: depth at which to select term
:param exact: if True, select for term exactly, else select for elements that contains term""" # select
        self.term = term; self.depth = depth; self.exact = exact                 # select


[docs]
    def __ror__(self, it):                                                       # select
        term = self.term; depth = self.depth; exact = self.exact                 # select
        if exact: return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term == s)) # select
        else:     return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term in s)) # select


def _flatten(x, joinF, prefix=""):                                               # _flatten
    for s,ch,*other in x: pre = joinF(prefix, s); yield [pre,*other]; yield from _flatten(ch, joinF, prefix=pre) # _flatten

[docs]
class flatten(BaseCli):                                                          # flatten

[docs]
    def __init__(self, joinF=None, delim="/"):                                   # flatten
        """Flattens the tree.
Example::

    flat | ktree.create() | ktree.flatten() | deref()

This returns::

    [['/a', 0, 0],
     ['/a/b', 1, 1],
     ['/a/b/2', 1, 2],
     ['/a/b/3', 1, 2],
     ['/a/c', 0, 1],
     ['/a/c/4', 1, 2],
     ['/a/c/5', 1, 2],
     ['/a/d', 1, 1]]

:param joinF: a function that takes in 2 parameters: prefix (like "/a/b") and element
    name (like "2"), to form a new prefix ("/a/b/2"). Defaulted to ``lambda p,s: f"{p}/{s}"``
:param delim: if use the default joinF, this specifies the delimiter to use"""   # flatten
        self.joinF = joinF or (lambda p,s: f"{p}{delim}{s}")                     # flatten


[docs]
    def __ror__(self, it): return _flatten(it, self.joinF)                       # flatten


def _prettyDepth(x, depth=0):                                                    # _prettyDepth
    for s,ch,*other in x:                                                        # _prettyDepth
        yield f"{depth}".ljust(3) + f"  "*(depth+1) + ([s, *other] | cli.join(' - ')) # _prettyDepth
        yield from _prettyDepth(ch, depth+1)                                     # _prettyDepth
def _prettyNoDepth(x, depth=0):                                                  # _prettyNoDepth
    for s,ch,*other in x:                                                        # _prettyNoDepth
        yield f"  "*depth + f"{s} - {other | cli.join(' - ')}"                   # _prettyNoDepth
        yield from _prettyNoDepth(ch, depth+1)                                   # _prettyNoDepth

[docs]
class pretty(BaseCli):                                                           # pretty

[docs]
    def __init__(self, depth=True):                                              # pretty
        """Converts tree into a string iterator, indented
with the correct level of depth. See also: :class:`display`

:param depth: whether to display the depth at the start of each line or not"""   # pretty
        self.depth = depth                                                       # pretty


[docs]
    def __ror__(self, it): return _prettyDepth(it) if self.depth else _prettyNoDepth(it) # pretty



[docs]
class display(BaseCli):                                                          # display

[docs]
    def __init__(self, lines=10, depth=True):                                    # display
        """Displays the tree.
Example::

    flat | ktree.create() | ktree.display()
    # equivalent to the above
    flat | ktree.create() | ktree.pretty() | stdout()

:param lines: line limit. Put "None" to display all
:param depth: whether to display the depth at the start of each line or not"""   # display
        self.lines = lines; self.depth = depth                                   # display


[docs]
    def __ror__(self, it): it | pretty(self.depth) | cli.head(self.lines) | cli.stdout() # display



[docs]
def search(term:str, exact=False):                                               # search
    """Searches for a specific term in the name of all elements.
Example::

    # returns [['a', [['b', [['2', [], 1, 2]], 1, 1]], 0, 0]]
    flat | ktree.create() | ktree.search("2")

:param term: term to search for
:param exact: if True, searches for that exact term, else searches
    for the term inside the string"""                                            # search
    if exact: st1 = apply(lambda s,ch,*o: [s,ch,(term == s) or any(c[2] for c in ch),*o]) # search
    else:     st1 = apply(lambda s,ch,*o: [s,ch,(term in s) or any(c[2] for c in ch),*o]) # search
    return st1 | filt(lambda s,ch,e,*o: e) | apply(lambda s,ch,e,*o: [s,ch,*o])  # search

_walk = lambda f: cli.aS(lambda fn: os.walk(os.path.expanduser(fn)) | cli.cut(0, 2) | cli.ungroup() | cli.join(os.sep).all() | f | cli.op().split(os.sep).all() | create()) # search

[docs]
def walk(fn=None, f=None):                                                       # walk
    """Walks the file system and generates a tree of all files and folders.
Example::

    "/home/ubuntu" | ktree.walk() # returns a tree
    ktree.walk("/home/ubundu")    # same as above
    ktree.walk(".")               # can also do relative paths
    "/home/ubuntu" | ktree.walk(f=~grep("git")) # returns a tree with no hidden git-related files
    ktree.walk("/home/ubuntu", ~grep("git"))    # same as above

:param fn: folder name
:param f: optional function if you want to filter out paths (Iterator[str])"""   # walk
    f = f or cli.iden(); return _walk(f) if fn is None else fn | _walk(f)        # walk