# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
"""All tools related to cif file format that describes protein structures.
Expected to use behind the "cif" module name, like this::
from k1lib.imports import *
cif.cat("abc.cif")
"""
import k1lib
import k1lib.cli as cli;
from k1lib.cli.init import BaseCli, yieldT
from typing import Iterator, List
from k1lib.cli.typehint import *
import k1lib._k1a as k1a
__all__ = ["tables", "records"]
hasTable = lambda: cli.filt(cli.grep("loop_") | cli.shape(0) | (cli.op() > 0))
toBlocks = lambda: cli.cat() | cli.grep("^#", sep=True).till()
def collect(l): # collect
inBlock = False; tmp = [] # collect
for e in l: # collect
if e.startswith(";"): # collect
inBlock = not inBlock # collect
if not inBlock: yield ("".join(tmp))[1:]; continue # collect
if not inBlock: yield e # collect
else: tmp.append(e) # collect
[docs]def tables(name=None, dikt=True): # tables
"""Loads table info.
Dictionary mode::
# both return output below
"1z7z.cif" | cif.tables() | op()["_audit_author"]
"1z7z.cif" | cif.tables("_audit_author")
Potential output::
{'name': ("'Xiao, C.'",
"'Bator-Kelly, C.M.'",
"'Rieder, E.'",
"'Chipman, P.R.'",
"'Craig, A.'",
"'Kuhn, R.J.'",
"'Wimmer, E.'",
"'Rossmann, M.G.'"),
'pdbx_ordinal': ('1', '2', '3', '4', '5', '6', '7', '8')}
Result is a dictionary of ``table name -> dict()``. That inner dictionary maps from
column name to a list of elements. All columns should have the same number of elements.
Table mode::
# both return output below
"1z7z.cif" | cif.tables("_audit_author", dikt=False)
"1z7z.cif" | cif.tables(dikt=False) | op()["_audit_author"]
Potential output::
[['name', 'pdbx_ordinal'],
["'Xiao, C.'", '1'],
["'Bator-Kelly, C.M.'", '2'],
["'Rieder, E.'", '3'],
["'Chipman, P.R.'", '4'],
["'Craig, A.'", '5'],
["'Kuhn, R.J.'", '6'],
["'Wimmer, E.'", '7'],
["'Rossmann, M.G.'", '8']]
Result is a dictionary of ``table name -> List[List[str]]``. So basically you're
getting the table directly.
:param name: if specified, only grabs the specified table, else returns every table
:param dikt: whether to return a dict or table for each table""" # tables
def inner(url): # f is iden() or deref(), depending on perf characteristics that you want # tables
a = url | toBlocks() | hasTable() | cli.apply(~cli.head(2) | cli.op().strip().all()) | cli.deref() # preprocessing, split to blocks # tables
b = a | cli.apply(~cli.filt(cli.op().startswith("_")) | (cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")).all() | cli.joinStreams()) # tables
fieldss = a | cli.apply(cli.filt(cli.op().startswith("_")) | cli.op().split(".")[1].all()) | cli.toList().all() # tables
f = (cli.transpose() | cli.apply(cli.item() & ~cli.head(1)) | cli.transpose() | cli.toDict(False)) if dikt else cli.iden() # asDict # tables
tableNames = a | cli.op()[0].split(".")[0].all() | cli.deref() # tables
c = [b, fieldss, tableNames] | cli.transpose() | (cli.filt(cli.op() == name, 2) if name is not None else cli.iden()) | cli.cut(0, 1)\
| ~cli.apply(lambda l, fields: collect(l) | cli.batched(len(fields), True) | cli.insert(fields) | f) # tables
d = [tableNames, c] | cli.toDict(False) # tables
if name is None: return d # tables
else: # tables
if len(d) != 1: return None # tables
else: return d.values() | cli.item() # tables
return cli.aS(inner) # tables
[docs]def records(): # records
"""Load record info.
Example::
"1z7z.cif" | cif.records() | op()["_exptl"] | deref()
Potential output::
[['entry_id', '1Z7Z'],
['method', "'ELECTRON MICROSCOPY'"],
['crystals_number', '?']]
Result is a dictionary of ``record name -> (n, 2) table``""" # records
each = ~cli.head(1) | cli.op().strip().all() | cli.apply(cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")) | cli.joinStreams() | cli.aS(collect) | cli.batched(2)\
| (cli.item(2) | cli.op().split(".")[0]) & (cli.apply(cli.op().split(".")[1], 0)) # records
return toBlocks() | ~hasTable() | cli.filt(cli.op().ab_len() > 1) | each.all() | cli.toDict() # records