Source code for k1lib.cli.cif

"""All tools related to cif file format that describes protein structures.
Expected to use behind the "cif" module name, like this::

    from k1lib.imports import *"abc.cif")
import k1lib
import k1lib.cli as cli;
from k1lib.cli.init import BaseCli, yieldT
from typing import Iterator, List
from k1lib.cli.typehint import *
import k1lib._k1a as k1a
__all__ = ["tables", "records"]
hasTable = lambda: cli.filt(cli.grep("loop_") | cli.shape(0) | (cli.op() > 0))
toBlocks = lambda: | cli.grep("^#", sep=True).till()
def collect(l):                                                                  # collect
    inBlock = False; tmp = []                                                    # collect
    for e in l:                                                                  # collect
        if e.startswith(";"):                                                    # collect
            inBlock = not inBlock                                                # collect
            if not inBlock: yield ("".join(tmp))[1:]; continue                   # collect
        if not inBlock: yield e                                                  # collect
        else: tmp.append(e)                                                      # collect
[docs]def tables(name=None, dikt=True): # tables """Loads table info. Dictionary mode:: # both return output below "1z7z.cif" | cif.tables() | op()["_audit_author"] "1z7z.cif" | cif.tables("_audit_author") Potential output:: {'name': ("'Xiao, C.'", "'Bator-Kelly, C.M.'", "'Rieder, E.'", "'Chipman, P.R.'", "'Craig, A.'", "'Kuhn, R.J.'", "'Wimmer, E.'", "'Rossmann, M.G.'"), 'pdbx_ordinal': ('1', '2', '3', '4', '5', '6', '7', '8')} Result is a dictionary of ``table name -> dict()``. That inner dictionary maps from column name to a list of elements. All columns should have the same number of elements. Table mode:: # both return output below "1z7z.cif" | cif.tables("_audit_author", dikt=False) "1z7z.cif" | cif.tables(dikt=False) | op()["_audit_author"] Potential output:: [['name', 'pdbx_ordinal'], ["'Xiao, C.'", '1'], ["'Bator-Kelly, C.M.'", '2'], ["'Rieder, E.'", '3'], ["'Chipman, P.R.'", '4'], ["'Craig, A.'", '5'], ["'Kuhn, R.J.'", '6'], ["'Wimmer, E.'", '7'], ["'Rossmann, M.G.'", '8']] Result is a dictionary of ``table name -> List[List[str]]``. So basically you're getting the table directly. :param name: if specified, only grabs the specified table, else returns every table :param dikt: whether to return a dict or table for each table""" # tables def inner(url): # f is iden() or deref(), depending on perf characteristics that you want # tables a = url | toBlocks() | hasTable() | cli.apply(~cli.head(2) | cli.op().strip().all()) | cli.deref() # preprocessing, split to blocks # tables b = a | cli.apply(~cli.filt(cli.op().startswith("_")) | (cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")).all() | cli.joinStreams()) # tables fieldss = a | cli.apply(cli.filt(cli.op().startswith("_")) | cli.op().split(".")[1].all()) | cli.toList().all() # tables f = (cli.transpose() | cli.apply(cli.item() & ~cli.head(1)) | cli.transpose() | cli.toDict(False)) if dikt else cli.iden() # asDict # tables tableNames = a | cli.op()[0].split(".")[0].all() | cli.deref() # tables c = [b, fieldss, tableNames] | cli.transpose() | (cli.filt(cli.op() == name, 2) if name is not None else cli.iden()) | cli.cut(0, 1)\ | ~cli.apply(lambda l, fields: collect(l) | cli.batched(len(fields), True) | cli.insert(fields) | f) # tables d = [tableNames, c] | cli.toDict(False) # tables if name is None: return d # tables else: # tables if len(d) != 1: return None # tables else: return d.values() | cli.item() # tables return cli.aS(inner) # tables
[docs]def records(): # records """Load record info. Example:: "1z7z.cif" | cif.records() | op()["_exptl"] | deref() Potential output:: [['entry_id', '1Z7Z'], ['method', "'ELECTRON MICROSCOPY'"], ['crystals_number', '?']] Result is a dictionary of ``record name -> (n, 2) table``""" # records each = ~cli.head(1) | cli.op().strip().all() | cli.apply(cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")) | cli.joinStreams() | cli.aS(collect) | cli.batched(2)\ | (cli.item(2) | cli.op().split(".")[0]) & (cli.apply(cli.op().split(".")[1], 0)) # records return toBlocks() | ~hasTable() | cli.filt(cli.op().ab_len() > 1) | each.all() | cli.toDict() # records