Source code for k1lib.cli.cif

# AUTOGENERATED FILE! PLEASE DON'T EDIT HERE. EDIT THE SOURCE NOTEBOOKS INSTEAD
"""All tools related to cif file format that describes protein structures.
Expected to use behind the "cif" module name, like this::

    from k1lib.imports import *
    cif.cat("abc.cif")
"""
import k1lib
import k1lib.cli as cli;
from k1lib.cli.init import BaseCli, yieldT
from typing import Iterator, List
from k1lib.cli.typehint import *
import k1lib._k1a as k1a
__all__ = ["tables", "records"]
hasTable = lambda: cli.filt(cli.grep("loop_") | cli.shape(0) | (cli.op() > 0))
toBlocks = lambda: cli.cat() | cli.grep("^#", sep=True).till()
def collect(l):                                                                  # collect
    inBlock = False; tmp = []                                                    # collect
    for e in l:                                                                  # collect
        if e.startswith(";"):                                                    # collect
            inBlock = not inBlock                                                # collect
            if not inBlock: yield ("".join(tmp))[1:]; continue                   # collect
        if not inBlock: yield e                                                  # collect
        else: tmp.append(e)                                                      # collect

[docs]
def tables(name=None, dikt=True):                                                # tables
    """Loads table info.
Dictionary mode::

    # both return output below
    "1z7z.cif" | cif.tables() | op()["_audit_author"]
    "1z7z.cif" | cif.tables("_audit_author")

Potential output::

    {'name': ("'Xiao, C.'",
      "'Bator-Kelly, C.M.'",
      "'Rieder, E.'",
      "'Chipman, P.R.'",
      "'Craig, A.'",
      "'Kuhn, R.J.'",
      "'Wimmer, E.'",
      "'Rossmann, M.G.'"),
     'pdbx_ordinal': ('1', '2', '3', '4', '5', '6', '7', '8')}

Result is a dictionary of ``table name -> dict()``. That inner dictionary maps from
column name to a list of elements. All columns should have the same number of elements.

Table mode::

    # both return output below
    "1z7z.cif" | cif.tables("_audit_author", dikt=False)
    "1z7z.cif" | cif.tables(dikt=False) | op()["_audit_author"]

Potential output::

    [['name', 'pdbx_ordinal'],
     ["'Xiao, C.'", '1'],
     ["'Bator-Kelly, C.M.'", '2'],
     ["'Rieder, E.'", '3'],
     ["'Chipman, P.R.'", '4'],
     ["'Craig, A.'", '5'],
     ["'Kuhn, R.J.'", '6'],
     ["'Wimmer, E.'", '7'],
     ["'Rossmann, M.G.'", '8']]

Result is a dictionary of ``table name -> List[List[str]]``. So basically you're
getting the table directly.

:param name: if specified, only grabs the specified table, else returns every table
:param dikt: whether to return a dict or table for each table"""                 # tables
    def inner(url): # f is iden() or deref(), depending on perf characteristics that you want # tables
        a = url | toBlocks() | hasTable() | cli.apply(~cli.head(2) | cli.op().strip().all()) | cli.deref() # preprocessing, split to blocks # tables
        b = a | cli.apply(~cli.filt(cli.op().startswith("_")) | (cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")).all() | cli.joinStreams()) # tables
        fieldss = a | cli.apply(cli.filt(cli.op().startswith("_")) | cli.op().split(".")[1].all()) | cli.toList().all() # tables
        f = (cli.transpose() | cli.apply(cli.item() & ~cli.head(1)) | cli.transpose() | cli.toDict(False)) if dikt else cli.iden() # asDict # tables
        tableNames = a | cli.op()[0].split(".")[0].all() | cli.deref()           # tables
        c = [b, fieldss, tableNames] | cli.transpose() | (cli.filt(cli.op() == name, 2) if name is not None else cli.iden()) | cli.cut(0, 1)\
            | ~cli.apply(lambda l, fields: collect(l) | cli.batched(len(fields), True) | cli.insert(fields) | f) # tables
        d = [tableNames, c] | cli.toDict(False)                                  # tables
        if name is None: return d                                                # tables
        else:                                                                    # tables
            if len(d) != 1: return None                                          # tables
            else: return d.values() | cli.item()                                 # tables
    return cli.aS(inner)                                                         # tables


[docs]
def records():                                                                   # records
    """Load record info.
Example::

    "1z7z.cif" | cif.records() | op()["_exptl"] | deref()

Potential output::

    [['entry_id', '1Z7Z'],
     ['method', "'ELECTRON MICROSCOPY'"],
     ['crystals_number', '?']]

Result is a dictionary of ``record name -> (n, 2) table``"""                     # records
    each = ~cli.head(1) | cli.op().strip().all() | cli.apply(cli.aS(k1a.str_split, " ") | cli.filt(cli.op() != "")) | cli.joinStreams() | cli.aS(collect) | cli.batched(2)\
        | (cli.item(2) | cli.op().split(".")[0]) & (cli.apply(cli.op().split(".")[1], 0)) # records
    return toBlocks() | ~hasTable() | cli.filt(cli.op().ab_len() > 1) | each.all() | cli.toDict() # records