# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
import k1lib as k1; import k1lib.cli as cli
from k1lib.cli import BaseCli
import os; from collections import deque, defaultdict
__all__ = ["create", "apply", "filt", "select", "flatten", "pretty", "display", "search", "walk"]
settings = k1.settings.cli
kSett = k1.Settings().add("tok", "\ue000", "Special token for internal processing")
settings.add("ktree", kSett, "cli.ktree module settings");
def groupBy2(x, depth=2): # stripped down, much faster version # groupBy2
d = {} # for some reason doing this instead of default dict is faster # groupBy2
for e in x: d[e[0]] = [] # groupBy2
for e in x: d[e[0]].append(e[1:]) # groupBy2
if depth > 0: # groupBy2
return [[k,groupBy2(v, depth-1)] for k,v in d.items()] # groupBy2
else: return list(d.items()) # groupBy2
return [[k,groupBy2(v, depth-1)] for k,v in d.items()] if depth > 0 else list(d.items()) # groupBy2
def _mktree(depth): # _mktree
if depth <= 0: return cli.iden() # _mktree
return cli.groupBy(0, True) | cli.apply(_mktree(depth-1) | cli.aS(list), 1) # _mktree
class mktree1(BaseCli): # mktree1
def __ror__(self, it): # mktree1
return groupBy2(it, len(it[0])-1) # mktree1
it = it | cli.deref(2); return (it | _mktree(len(it[0])-1))# if len(it) > 0 else [] # mktree1
def prune1(x, depth=2): # eliminating empty elements. For some reason, _mktree() creates an empty element at each leaf. No idea why and too lazy to dig in further # prune1
for elem in x: # prune1
if len(elem) == 0: continue # prune1
s, children = elem # prune1
if depth > 0: children = list(prune1(children, depth-1)) # prune1
yield [s, children] # prune1
def prune2(x, depth=2) -> "[children, hasToken]": # trying to grab whether there is a branch that ends at the parent or not # prune2
hasToken = False; ans = [] # prune2
for s, children in x: # prune2
hT = False; value = None # prune2
if depth > 0: # prune2
children, hT = prune2(children, depth-1) # prune2
hasToken = hasToken or s == kSett.tok # prune2
if len(children) == 0: hT = 1 # prune2
else: # prune2
hasToken = hasToken or s == kSett.tok; hT = 1 # prune2
value = children[0][0]; children = [] # prune2
ans.append([s, children, hT+0, value]) # prune2
return ans, hasToken # prune2
def migrate(x, depth=0, depthSinceTok=0, cache=None): # trying to migrate the detached nested value fields to the right position and depth # migrate
if cache is None: cache = [] # migrate
for elem in x: # migrate
s,ch,hT,v = elem; isTok = s == kSett.tok # migrate
if isTok: cache.append([depth, depthSinceTok+1, v]) # migrate
ch = list(migrate(ch, depth+1, depthSinceTok+isTok, cache)) # migrate
if cache: # migrate
d,dt,value = cache[-1] # migrate
if d-dt==depth: cache.pop(); v = value # migrate
yield s,ch,hT,v # migrate
def prune3(x, depth=0): # trying to eliminate the "virtual" elements tagged with the token # prune3
ans = [] # prune3
for s, children, num, value in x: # prune3
if s == kSett.tok: continue # prune3
ans.append([s, prune3(children, depth+1), num, depth, value]) # prune3
return ans # prune3
def prune(x, depth): return prune3(migrate(prune2(prune1(x, depth), depth)[0])) # prune
def prune_makeup(x): # prune_makeup
for elem in x: # prune_makeup
yield [elem[0], prune_makeup(elem[1]), 0] if len(elem) > 1 else [elem[0], [], 0] # last column for extra metadata slot # prune_makeup
[docs]class create(BaseCli): # create
[docs] def __init__(self, n=0): # create
"""Creates a tree structure from a flat table.
Example::
flat = [["a", "b"],
["a", "b", "2"],
["a", "b", "3"],
["a", "c", "4"],
["a", "c", "5"],
["a", "d"]]
flat | ktree.create()
That will return this::
[['a', [
['b', [
['2', [], 1, 2, None],
['3', [], 1, 2, None]
], 1, 1, None],
['c', [
['4', [], 1, 2, None],
['5', [], 1, 2, None]
], 0, 1, None],
['d', [], 1, 1, None]
], 0, 0, None]]
Each element in the array is represented by a 5-tuple: ``(name, children, ends here?, depth, value)``:
- name: name of this element
- children: a list of elements with similar structure to this element
- ends here?: whether there is a row in the flat table that exists for this
particular element. Most has it, like "a/b/2" and "a/b/3", but some don't,
like "a/c", so "ends here?" of "c" is 0.
- depth: depth of this element inside the tree
- value: value associated with this element, explained more below
In some datasets, you're not just interested in the tree structure, but you also want to manipulate some data
structures associated with the element in the tree. You can specify how much value at the end you want to
capture by using the param ``.n``. It's a little hard to explain with words, so here's an example::
flat1 = [["a", "$0"],
["a", "b", "$1"],
["a", "b", "2", "$2"],
["a", "b", "3", "$3"],
["a", "c", "4", "$4"],
["a", "c", "5", "$5"],
["a", "d", "$6"]]
flat1 | ktree.create(1)
That will return this::
[['a', [
['b', [
['2', [], 0, 2, '$2'],
['3', [], 0, 2, '$3']
], 1, 1, '$1'],
['c', [
['4', [], 0, 2, '$4'],
['5', [], 0, 2, '$5']
], 0, 1, None],
['d', [], 1, 1, '$6']
], 1, 0, '$0']]
This looks pretty much identical to the previous result, but with the None values replaced with
the last column of the flattened tree. You can specify the number of columns to take from the
end of the flattened tree. So this is possible::
flat2 = [["a", *"$0"],
["a", "b", *"$1"],
["a", "b", "2", *"$2"],
["a", "b", "3", *"$3"],
["a", "c", "4", *"$4"],
["a", "c", "5", *"$5"],
["a", "d", *"$6"]]
flat2 | ktree.create(2)
That will return this::
[['a', [
['b', [
['2', [], 0, 2, ('$', '2')],
['3', [], 0, 2, ('$', '3')]
], 1, 1, ('$', '1')],
['c', [
['4', [], 0, 2, ('$', '4')],
['5', [], 0, 2, ('$', '5')]
], 0, 1, None],
['d', [], 1, 1, ('$', '6')]
], 1, 0, ('$', '0')]]
It'll just bunch up the values into a tuple.
About performance, this implementation isn't the best. If the depth is not too crazy (say, <3)
then a throughput of 442k rows/s can be achieved. But if the depth is big (say ~20),
then it can be as slow as 6k rows/s.
.. warning::
Internally, this makes use of the special character "\\ue000", configurable
at ``settings.cli.ktree.tok`` to create the tree. This is in Unicode's private
use area, so it shouldn't appear anywhere in normal text. However, if your
application uses this character, it's best to change it to something else
""" # create
self.n = n # create
[docs] def __ror__(self, it): it = _preprocess(it, self.n); return prune(it | mktree1(), depth=len(it[0])-2) # create
def _preprocess(it, n): # _preprocess
try: it[:]; len(it) # _preprocess
except: it = it | cli.deref(2) # _preprocess
if n == 0: it = [[*row, None] for row in it] # _preprocess
elif n == 1: it = [[*row[:-1], row[-1]] for row in it] # _preprocess
else: it = [[*row[:-n], tuple(row[-n:])] for row in it] # _preprocess
values = [row[-1] for row in it] # _preprocess
return [[*row, v] for row, v in zip(it | cli.apply(lambda row: row[:-1]) | cli.transpose(fill=kSett.tok) | cli.transpose(), values)] # _preprocess
def _applyRet(x, f) -> "ch": # _applyRet
ans = [] # _applyRet
for s, ch, *other in x: ans.append(f(s, _applyRet(ch, f), *other)) # _applyRet
return ans # _applyRet
[docs]class apply(BaseCli): # apply
[docs] def __init__(self, f): # apply
"""Transforms the tree recursively.
Example::
a = flat | ktree.create() | ktree.apply(lambda s,ch,e,d,v: [s,ch,(s | (tryout(0) | aS(int))) + (ch | cut(2) | toSum())])
That returns this::
[['a',
[['b', [['2', [], 2], ['3', [], 3]], 5],
['c', [['4', [], 4], ['5', [], 5]], 9],
['d', [], 0]],
14]]
Extracting the final result::
a | cut(2) | item() # returns 14
Here, I'm trying to add up all elements that can be converted into an
integer, but do that at every node in the tree. The ``s | (tryout(0) | aS(int))``
part is trying to convert it into an integer, returning 0 if it can't do it.
Then, the ``ch | cut(2) | toSum()`` tries to add up the result of all children
underneath it. The results naturally trickle upwards and you get the result "14"
at the top level, at which point it's trivial to extract.
If you're using a lot of cli tools inside the lambda function and the performance
is terrible, you can try to initialize the cli tools outside the lambda, and then
just use it inside, to eliminate cli initialization cost, like this::
def getVal(x):
try: return int(x)
except: return 0
sumF = cut(2) | toSum()
flat | ktree.create() | ktree.apply(lambda s,ch,e,d: [s,ch,getVal(s) + sumF(ch)])
This should give you enough performance for your purpose, but if you want even more
perf, don't use cli tools inside the lambda at all.""" # apply
self.f = f # apply
[docs] def __ror__(self, it): return _applyRet(it, self.f) # apply
def _filter(x, f) -> "ch": # _filter
ans = [] # _filter
for s, ch, *other in x: # _filter
ch = _filter(ch, f) # _filter
res = f(s, ch, *other) # _filter
if res: ans.append([s, ch, *other]) # _filter
return ans # _filter
[docs]class filt(BaseCli): # filt
[docs] def __init__(self, f): # filt
"""Filters out elements that don't pass a certain predicate.
Example::
# returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]]
flat | ktree.create() | ktree.filt(lambda s,ch,e,d: d<2)
Essentially, this filters out any elements that are of depth 2 or more.""" # filt
self.f = f # filt
[docs] def __ror__(self, it): return _filter(it, self.f) # filt
[docs]class select(BaseCli): # select
[docs] def __init__(self, term:str, depth:int, exact:bool=True): # select
"""Selects for a single element at a particular depth.
Example::
# returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/*/b"
flat | ktree.create() | ktree.select("b", 1)
# returns [['a', [['b', [], 1, 1], ['c', [], 0, 1], ['d', [], 1, 1]], 0, 0]]. It selects for "/*/*/b", which does not exist. But it still returns all elements in depth 1, as depth 1 is not selected for
flat | ktree.create() | ktree.select("b", 2)
# returns [['a', [['b', [['2', [], 1, 2], ['3', [], 1, 2]], 1, 1]], 0, 0]]. It selects for "/a/b"
flat | ktree.create() | ktree.select("a", 0) | ktree.select("b", 1)
# returns []. It selects for "/c/b", which does not exist
flat | ktree.create() | ktree.select("c", 0) | ktree.select("b", 1)
:param term: term to select
:param depth: depth at which to select term
:param exact: if True, select for term exactly, else select for elements that contains term""" # select
self.term = term; self.depth = depth; self.exact = exact # select
[docs] def __ror__(self, it): # select
term = self.term; depth = self.depth; exact = self.exact # select
if exact: return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term == s)) # select
else: return it | filt(lambda s,ch,e,d,v: (d != depth) or (d == depth and term in s)) # select
def _flatten(x, joinF, prefix=""): # _flatten
for s,ch,*other in x: pre = joinF(prefix, s); yield [pre,*other]; yield from _flatten(ch, joinF, prefix=pre) # _flatten
[docs]class flatten(BaseCli): # flatten
[docs] def __init__(self, joinF=None, delim="/"): # flatten
"""Flattens the tree.
Example::
flat | ktree.create() | ktree.flatten() | deref()
This returns::
[['/a', 0, 0],
['/a/b', 1, 1],
['/a/b/2', 1, 2],
['/a/b/3', 1, 2],
['/a/c', 0, 1],
['/a/c/4', 1, 2],
['/a/c/5', 1, 2],
['/a/d', 1, 1]]
:param joinF: a function that takes in 2 parameters: prefix (like "/a/b") and element
name (like "2"), to form a new prefix ("/a/b/2"). Defaulted to ``lambda p,s: f"{p}/{s}"``
:param delim: if use the default joinF, this specifies the delimiter to use""" # flatten
self.joinF = joinF or (lambda p,s: f"{p}{delim}{s}") # flatten
[docs] def __ror__(self, it): return _flatten(it, self.joinF) # flatten
def _prettyDepth(x, depth=0): # _prettyDepth
for s,ch,*other in x: # _prettyDepth
yield f"{depth}".ljust(3) + f" "*(depth+1) + ([s, *other] | cli.join(' - ')) # _prettyDepth
yield from _prettyDepth(ch, depth+1) # _prettyDepth
def _prettyNoDepth(x, depth=0): # _prettyNoDepth
for s,ch,*other in x: # _prettyNoDepth
yield f" "*depth + f"{s} - {other | cli.join(' - ')}" # _prettyNoDepth
yield from _prettyNoDepth(ch, depth+1) # _prettyNoDepth
[docs]class pretty(BaseCli): # pretty
[docs] def __init__(self, depth=True): # pretty
"""Converts tree into a string iterator, indented
with the correct level of depth. See also: :class:`display`
:param depth: whether to display the depth at the start of each line or not""" # pretty
self.depth = depth # pretty
[docs] def __ror__(self, it): return _prettyDepth(it) if self.depth else _prettyNoDepth(it) # pretty
[docs]class display(BaseCli): # display
[docs] def __init__(self, lines=10, depth=True): # display
"""Displays the tree.
Example::
flat | ktree.create() | ktree.display()
# equivalent to the above
flat | ktree.create() | ktree.pretty() | stdout()
:param lines: line limit. Put "None" to display all
:param depth: whether to display the depth at the start of each line or not""" # display
self.lines = lines; self.depth = depth # display
[docs] def __ror__(self, it): it | pretty(self.depth) | cli.head(self.lines) | cli.stdout() # display
[docs]def search(term:str, exact=False): # search
"""Searches for a specific term in the name of all elements.
Example::
# returns [['a', [['b', [['2', [], 1, 2]], 1, 1]], 0, 0]]
flat | ktree.create() | ktree.search("2")
:param term: term to search for
:param exact: if True, searches for that exact term, else searches
for the term inside the string""" # search
if exact: st1 = apply(lambda s,ch,*o: [s,ch,(term == s) or any(c[2] for c in ch),*o]) # search
else: st1 = apply(lambda s,ch,*o: [s,ch,(term in s) or any(c[2] for c in ch),*o]) # search
return st1 | filt(lambda s,ch,e,*o: e) | apply(lambda s,ch,e,*o: [s,ch,*o]) # search
_walk = lambda f: cli.aS(lambda fn: os.walk(os.path.expanduser(fn)) | cli.cut(0, 2) | cli.ungroup() | cli.join(os.sep).all() | f | cli.op().split(os.sep).all() | create()) # search
[docs]def walk(fn=None, f=None): # walk
"""Walks the file system and generates a tree of all files and folders.
Example::
"/home/ubuntu" | ktree.walk() # returns a tree
ktree.walk("/home/ubundu") # same as above
ktree.walk(".") # can also do relative paths
"/home/ubuntu" | ktree.walk(f=~grep("git")) # returns a tree with no hidden git-related files
ktree.walk("/home/ubuntu", ~grep("git")) # same as above
:param fn: folder name
:param f: optional function if you want to filter out paths (Iterator[str])""" # walk
f = f or cli.iden(); return _walk(f) if fn is None else fn | _walk(f) # walk