from k1lib.imports import *
import pydicom

["abc", "ab"] | applyMp(lambda x: len(x)) | toList()

[3, 2]

#base = "/home/kelvin/hdd/data/osic-pulmonary-fibrosis-progression"
base = "/home/kelvin/repos/labs/data/osic-pulmonary-fibrosis-progression"
baseT = f"{base}/train"

file = ls(base) | item() | ls() | item() | ls() | item()
f = pydicom.read_file(file)
img = torch.tensor(f.pixel_array.astype(np.float32))
img.shape

torch.Size([512, 512])

%%time
h, w = ls(baseT)\
| applyMp(ls() | apply(lambda dcmF: pydicom.read_file(dcmF).pixel_array.shape) | toList())\
| joinStreams() | transpose() | deref()

Corrupt JPEG data: bad Huffman code
Unsupported marker type 0xfa

CPU times: user 293 ms, sys: 121 ms, total: 414 ms
Wall time: 4.97 s

w | count() | ~sort() | display(None)

24050   512    73%   
8078    768    24%   
536     888    2%    
338     632    1%    
24      1302   0%

h | count() | ~sort() | display(None)

24050   512    73%   
8078    768    24%   
338     632    1%    
319     733    1%    
71      734    0%    
57      752    0%    
31      843    0%    
30      788    0%    
28      1100   0%    
24      1302   0%

def patientMinMax(patientUrl):
    import pydicom
    from k1lib.cli import ls, apply, deref
    def f(dcmF):
        img = pydicom.read_file(dcmF).pixel_array
        return img.min(), img.max()
    return patientUrl | ls() | apply(f) | deref()

%%time
minMax = ls(base) | item() | ls() | applyMp(patientMinMax)
min_, max_ = minMax | joinStreams() | transpose() | (toMin() + toMax()); min_, max_

Corrupt JPEG data: bad Huffman code
Unsupported marker type 0xfa

CPU times: user 293 ms, sys: 111 ms, total: 404 ms
Wall time: 6.47 s

(-31860, 65535)

def patientHist(patientUrl):
    dcms = patientUrl | ls()
    histT = torch.zeros(300)
    for dcmF in dcms | randomize(None) | head(30):
        img = torch.from_numpy(pydicom.read_file(dcmF).pixel_array[::10,::10].astype(np.float32))
        histT += torch.histc(img, 300, min_, max_)
        #return len(dcms); break
    return histT

%%time
hists = ls(baseT) | apply(patientHist, None, 20) | deref()

CPU times: user 31.6 s, sys: 540 ms, total: 32.2 s
Wall time: 4.19 s

histT = torch.zeros(300)
for h in hists: histT += h
r = torch.linspace(min_, max_, 300)
plt.plot(r, histT); plt.yscale("log");

b = histT > 1e5; plt.plot(r[b], histT[b]); r[b][::3], histT[b][::3]

(tensor([-3121.7695,  -961.0006,  -312.7697,   335.4611,   983.6928]),
 tensor([ 575700., 1534336.,  193925.,  587804., 2879081.]))

plt.hist(f.pixel_array.flatten(), bins=30);

plt.imshow(f.pixel_array);

all(f.pixel_array[:20, :20].flatten() == -2048)

False

f.pixel_array[20:80, 200:300]

array([[  12,   66,    4, ...,   22,  177,    0],
       [   0,  211,  235, ...,    0,  137,  112],
       [   4,   46,  179, ...,    0,    0,  192],
       ...,
       [ 713,  948, 1065, ...,   84,   71,  233],
       [ 949,  987, 1014, ...,  745,  910, 1155],
       [ 730,  899,  920, ..., 1130, 1214, 1272]], dtype=uint16)

%%time
bounds = base | ls() | item() | ls() | ls().all() | joinStreams() | randomize(None)\
| batched(100) | head(30) | (
    applyMp(lambda dcmF: torch.tensor(pydicom.read_file(dcmF).pixel_array.flatten()[::128].astype(np.float32)))\
    | ~aS(torch.crissCross)
).all() | ~aS(torch.crissCross)

CPU times: user 4.44 s, sys: 2.5 s, total: 6.94 s
Wall time: 4.69 s

def gen(img=None):
    if img is None:
        f = pydicom.read_file(ls(base) | item() | ls() | ls().all() | joinStreams() | randomize() | item())
        img = torch.tensor(f.pixel_array.astype(np.float32))
    fig, (ax1, ax2) = plt.subplots(1, 2, dpi=150)
    ax1.imshow(img); ax2.imshow(img.histScaled(bounds=bounds));
gen()

gen()

OSIC, but image props¶

Make scales look nice¶