-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathctpy.py
650 lines (498 loc) · 21.3 KB
/
ctpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 14 17:49:35 2021
@author: Jordan Lubbers, Oregon State University
Last updated: 11/15/2021
CTPy is a collection of functions dedicated to the processing of Computed
Tomography (CT) data in python. Built on top of scikit-image, it imports,
denoises, plots, segments, and exports segmentation results. All figures are
returned as matplotlib figure and axis objects and as such are fully
customizable per matplotlib documentation. Data are processed as numpy arrays
and further mathematical/image processing operations can be utilized pending
the research question under investigation.
Happy processing!
"""
# bulk file import
import glob as glob
# for...warnings
import warnings
# visualization
import matplotlib.pyplot as plt
# array operations
import numpy as np
# saving metadata
import pandas as pd
# for loading dicom images
import pydicom as dicom
from PIL import Image
# image processing
from skimage import exposure, restoration
from skimage.filters import prewitt, roberts, scharr, sobel
from skimage.segmentation import watershed
# progress bar
from tqdm.notebook import tqdm
#%%
# helper function to import an entire folder of 2D images
def import_stack(filepath, filetype, name, downsample=False):
"""
import_stack imports a stack of images and converts them into a 3D numpy
array so that it can be further processed using either numpy functions or
scikit-image based functions in the CTPy functions in the module.
import_stack(filepath,filetype,name):
Parameters
----------
filepath : string
file directory for where the series of images is located. It is recommended
that the files be in their own folder, as the function will import all
files from the directory as specified in the 'filetype' argument.
filetype : string
type of file the images are. Choices are 'tif', 'png', 'jpg', or 'dicom'
name : string
name of the dataset you are working with. This will be utilized for
figure labeling and filesaving functions later on.
downsample: int
factor to downsample by.
Returns
-------
stack : ndarray
3D numpy ndarray where the first two dimensions pertain to an individual
image (e.g., slice) in the dataset and the last dimension pertains to
the slice number in the stack. Since numpy arrays are in [rows,columns,depth]
format, this is analagous to [y,x,z]
"""
if "png" in filetype:
# creates a list of filepaths for each 2D images
infiles = sorted(glob.glob("{}/*.png".format(filepath)))
elif "tif" in filetype:
# creates a list of filepaths for each 2D images
infiles = sorted(glob.glob("{}/*.tif".format(filepath)))
elif "jpg" in filetype:
# creates a list of filepaths for each 2D images
infiles = sorted(glob.glob("{}/*.jpg".format(filepath)))
elif "dicom" in filetype:
#this grabs every single item in the dirpath
all_files = glob.glob("{}/*".format(filepath))
#this should remove all file extensions e.g. .png .tiff .xlsx
#anything with a ".", but NOT FOLDERS
infiles = sorted([d for d in all_files if '.' not in d])
if "dicom" in filetype:
#open the first dicom file and get the data as a numpy array
im = dicom.dcmread(infiles[0]).pixel_array
# create an 'empty' array filled with zeros, (Slices X width X length) is the shape of the array.
# this is equivalent to (z, y, x) in cartesian space
stack = np.zeros((len(infiles), im.shape[0], im.shape[1]))
else:
# open the first image to acquire image size
im = Image.open(infiles[0])
# create an 'empty' array filled with zeros, (Slices X width X length) is the shape of the array.
# this is equivalent to (z, y, x) in cartesian space
stack = np.zeros((len(infiles), im.size[1], im.size[0]))
# loop the files, each iteration adds one slice layer into the array.
i = 0
print("Importing images")
for imfile in tqdm(infiles, total = len(infiles), unit = 'files'):
if "dicom" in filetype:
im = dicom.dcmread(imfile).pixel_array
else:
im = Image.open(imfile)
stack[i, :, :] = np.array(im)
i += 1
# force each image to have a shape comprised of two even numbers
# such that we can at least downsample by a factor of 2
# original rows and cols dimensions
input_size_rows = stack.shape[1]
input_size_cols = stack.shape[2]
# make row and column dimensions even numbers
if input_size_rows % 2 == 0:
input_size_rows = input_size_rows
else:
input_size_rows = input_size_rows - 1
if input_size_cols % 2 == 0:
input_size_cols = input_size_cols
else:
input_size_cols = input_size_cols - 1
# update the stack dimensions by trimming off a pixel
# from either the rows or the columns (or both)
stack = stack[:, :input_size_rows, :input_size_cols]
# DOWNSAMPLING
## uses following example from scipy cookbook
# https://scipython.com/blog/binning-a-2d-array-in-numpy/
# note this will only downsample by common factors between
# the rows and columns dimensions
# here's a helper function to figure that out:
def cf(num1, num2):
common_factors = []
for i in range(1, min(num1, num2) + 1):
if num1 % i == num2 % i == 0:
common_factors.append(i)
return common_factors
# downsampled rows and cols dimensions
output_size_rows = stack.shape[1] // downsample
output_size_cols = stack.shape[2] // downsample
# array of potential downsampling factors based on input
# shape
common_factors = np.array(cf(input_size_rows, input_size_cols))
# downsample if the specified factor is a potential common factor
if downsample in common_factors:
print("downsampling images")
# bin size for rows and cols
bin_size_rows = input_size_rows // output_size_rows
bin_size_cols = input_size_cols // output_size_cols
# create empty array to fill with downsampled data
# this will have downsampled rows and cols but NOT
# height
stack_downsample = np.zeros(
(stack.shape[0], output_size_rows, output_size_cols)
)
# loop through and complete downsampling for every slice and
# add it to the empty array defined above
for i in tqdm(range(stack.shape[0]), total = stack.shape[0], unit = 'images'):
im = stack[i, :, :]
im_downsample = (
im.reshape(
(output_size_rows, bin_size_rows, output_size_cols, bin_size_cols)
)
.mean(-1)
.mean(1)
)
stack_downsample[i, :, :] = im_downsample
stack_downsample = stack_downsample[::downsample, :, :]
print("your original stack dimensions are {}".format(stack.shape))
print("your downsampled stack dimensions are {}".format(stack_downsample.shape))
return stack_downsample, stack.shape
elif downsample not in common_factors:
warnings.warn(
"""You have chosen a downsampling factor that is not a common factor \n of your row
and column shapes and have therefore returned the original data. Please choose an
appropriate downsampling factor"""
)
print("your stack dimensions are {}".format(stack.shape))
print("potential downsampling factors: {}".format(common_factors))
return stack, stack.shape
elif downsample is False and stack.ravel().shape > 125000000:
warnings.warn(
"""please condsider downsampling your stack as it is very large and
image processing may become much more difficult pending your computer
capabilities. Check your Activity monitor for advice.
"""
)
return stack, stack.shape
else:
print("your stack dimensions are {}".format(stack.shape))
return stack, stack.shape
#%%
# helper function to rescale the data between 0 and 1 and also trim
# it at upper and lower limits
def rescale_stack(lower_lim, upper_lim, stack):
"""
rescale_stack uses lower and upper defined boundaries to apply contrast
contrast stretching to a desired stack. In brief, it uses percentiles as
the boundary conditions so that a small amount of pixel values do not
perturb the colormap, making small changes in value harder to see. Plots
the results of the contrast stretching by comparing the grayscale values
for a single slice using both a histogram and 2D image.
rescale_stack(lower_lim,upper_lim,stack,slice_number):
Parameters
----------
lower_lim : scalar
lower limit of the rescale range in percent (e.g., .01)
upper_lim : scalar
upper limit of the rescale range in percent (e.g., 99.99)
stack : ndarray
dataset to apply the contrast stretching to. Must be a 3D array
Returns
-------
stack_rescale : ndarray
contrast stretched and rescaled dataset in the same shape as the input
stack
stack_rescaleArray1D : 1D array
contrast stretched data flattened to 1D so it can be visualized in a
histogram
"""
# find the pixel values of the upper and lower percentile limits
plow, phigh = np.percentile(stack, (lower_lim, upper_lim))
# use those values to rescale the data
stack_rescale = exposure.rescale_intensity(stack, in_range=(plow, phigh))
stack_normal = (stack_rescale - stack_rescale.min()) / (
stack_rescale.max() - stack_rescale.min()
)
stack_normal1D = stack_normal.ravel()
return plow, phigh, stack_normal, stack_normal1D
#%%
# helper function for denoising your stack with the non local means function
def denoise_stack(stack_rescale, patch_size, patch_distance):
"""
denoise_stack applies the non-local-means filter from scikit-image restoration
module (https://scikit-image.org/docs/dev/api/skimage.restoration.html) to
to the entire stack on a slice by slice basis.
denoise_stack(stack_rescale,patch_size,patch_distance):
Parameters
----------
stack_rescale : ndarray
data to be denoised. This can be a single image (e.g., 2D array) or
whole stack of images (e.g., 3D array)
patch_size : int
patch_size argument from scikit-image.restoration.denoise_nl_means:
"Size of patches used for denoising."
patch_distance : int
patch_distance argument from scikit-image.restoration.denoise_nl_means:
"Maximal distance in pixels where to search patches used for denoising."
Returns
-------
nlm_array : ndarray
3D array in the same shape as "stack_rescale" that contains the denoised
pixel values
nlm_array1D : 1D array
1D array that contains the denoised pixel values. These values are the
same as nlm_array, just flattened so that they can be used for a histogram
"""
stack_length = len(stack_rescale)
nlm_smooth = []
for i in tqdm(range(0, stack_length)):
patch_kw = dict(
patch_size=patch_size, patch_distance=patch_distance, multichannel=True
)
nlm_smooth.append(
restoration.denoise_nl_means(stack_rescale[i], fast_mode=True, **patch_kw)
)
# convert from list to numpy array
stack_nlm = np.asarray(nlm_smooth)
stack_nlm_1D = stack_nlm.ravel()
print("Your stack has been smoothed using the Skimage non-local means algorithm")
print("patch size: {}".format(patch_size))
print("patch distance: {}".format(patch_distance))
return stack_nlm, stack_nlm_1D
#%%
def create_elevation_map(smoothed_stack, method):
"""
create_elevation_map utilizes the sobel filter from skimage to create an
elevation map based on the gradient at each pixel. It iterates through the
entire stack of data to create an individual elevation map for each image
Parameters
----------
smoothed_stack : ndarray
3D denoised data array
method : string
'sobel', 'roberts', 'scharr', 'prewitt'. This determines the type of
edge operator to use in creating the elevation map. See example here:
https://scikit-image.org/docs/dev/auto_examples/edges/plot_edge_filter.html
Returns
-------
elevation_map : ndarray
collection of elevation maps in same shape as input (3D ndarray)
"""
elevation_map = np.zeros(smoothed_stack.shape)
if method == "sobel":
for i in tqdm(range(len(elevation_map))):
elevation_map[i, :, :] = sobel(smoothed_stack[i, :, :])
elif method == "roberts":
for i in tqdm(range(len(elevation_map))):
elevation_map[i, :, :] = roberts(smoothed_stack[i, :, :])
elif method == "scharr":
for i in tqdm(range(len(elevation_map))):
elevation_map[i, :, :] = scharr(smoothed_stack[i, :, :])
elif method == "prewitt":
for i in tqdm(range(len(elevation_map))):
elevation_map[i, :, :] = prewitt(smoothed_stack[i, :, :])
return elevation_map
#%%
def add_markers(stack_nlm, phase_limits):
"""
Parameters:
----------
stack_nlm | ndarray
denoised data array. This is used to create the right size ndarray
for the marker array
phase_limits | list
list of values to be used as delimiters for the phase boundaries.
For phases that are neither the most or least attenuating values
will be a list:
phase_limits = [[0.5],
[0.6, 0.7],
[0.84]]
This will set markers for three phases where pixel locations with values
less than 0.5 will be used as a marker for phase 1, locations with values
between 0.6 and 0.7 will be used as a marker for phase 2, locations with
values greater than 0.84 will be used as a marker for phase 3.
Returns:
--------
markers | ndarray
marker array the same shape as the input array. All unspecified marker
locations are 0
"""
markers = np.zeros(stack_nlm.shape)
for j in range(len(phase_limits)):
print("filling markers for phase: {}".format(j + 1))
if len(phase_limits[j]) == 1:
if j == len(phase_limits) - 1:
print("pixel boundary: x > {}".format(phase_limits[j]))
for i in tqdm(range(len(markers))):
markers[i, :, :][stack_nlm[i] > phase_limits[j]] = j + 1
else:
print("pixel boundary: x < {}".format(phase_limits[j]))
for i in tqdm(range(len(markers))):
markers[i, :, :][stack_nlm[i] < phase_limits[j]] = j + 1
elif len(phase_limits[j]) == 2:
print(
"pixel boundary: {} > x > {}".format(
phase_limits[j][0], phase_limits[j][1]
)
)
for i in tqdm(range(len(markers))):
markers[i, :, :][
np.logical_and(
stack_nlm[i] > phase_limits[j][0],
stack_nlm[i] < phase_limits[j][1],
)
] = (j + 1)
return markers
#%%
def run_watershed_segmentation(elevation_array, markers):
"""
run_watershed_segmentation utilizes the watershed function from the segmentation
module (https://scikit-image.org/docs/dev/api/skimage.segmentation.html) and
times the results
run_watershed_segmentation(smoothed_stack,markers):
Parameters
----------
elevation_array : ndarray
data to apply the watershed algorithm to. This is our elevation map
3D matrix
markers : ndarray
marker array in the same shape as the "elevation_array" argument
Returns
-------
ws_results : ndarray
array in the same shape as the "elevation_array" array
with values from "markers" as decided by the watershed
algorithm
"""
# containers
segmentation = np.zeros(elevation_array.shape)
for i in tqdm(range(len(elevation_array))):
# fill segmentation array with watershed results for each slice
segmentation[i, :, :] = watershed(elevation_array[i, :, :], markers[i, :, :])
return segmentation
#%%
def save_seg_results(outpath, name, ws_results, cmap,phase_limits):
"""
save_seg_results takes each segmented 2D image (array) and saves them as
individual images to a desired folder.
save_seg_results(outpath,name,ws_results):
will save the images in the following format:
"outpath/name_seg_results_z.tif" where "name" and "outpath" are strings
input to the function and "z" is a 3 digit slice number
(i.e., 000, 001, 002)
Parameters
----------
outpath : string
filepath to the folder where the images are to be saved
name : string
name for the dataset you are working with. Try to keep this consistent
throughout the entire project.
ws_results : ndarray
results from the watershed segmentation
phase_limits : list
list of values to be used as delimiters for the phase boundaries.
For phases that are neither the most or least attenuating values
will be a list:
phase_limits = [[0.5],
[0.6, 0.7],
[0.84]]
This is the same input as the `add_markers` function
Returns
-------
None.
"""
for i in tqdm(range(len(ws_results))):
fig, axes = plt.subplots(nrows=1, ncols=1)
ax0 = axes
ax0.imshow(ws_results[i], cmap=cmap,vmin = 0, vmax = len(phase_limits)-1) # choose your segmentation algorithm here
ax0.axis("off") # no axes so its just the image
fig.set_size_inches(5, 5)
fig.subplots_adjust(
left=None, bottom=None, right=None, top=None, wspace=None, hspace=None
)
plt.savefig(
"{}/{}_seg_results{}.tif".format(outpath, name, i), bbox_inches="tight"
)
plt.close()
print("Your images have been saved to the following directory:\n{}".format(outpath))
#%%
def save_metadata(segmentation_metadata, phase_limits, outpath, filename):
"""
For the reproducible segmentation of your CT volume, this will
produce a .csv file with all the decisions made in the segmentation
process.
Parameters:
-----------
segmentation_metadata | dict
dictionary organized as follows:
segmentation_metadata = {
"name": name,
"original_stack_shape": [original_stack_shape],
"processed_stack_shape": [stack.shape],
"rescaled_percentile_values": [[lower_lim, upper_lim]],
"denoise_patch_size": patch_size,
"denoise_patch_distance": patch_distance,
"elevation_map_algorithm": elevation_algorithm,
}
phase_limits | list
list of values to be used as delimiters for the phase boundaries.
For phases that are neither the most or least attenuating values
will be a list:
phase_limits = [[0.5],
[0.6, 0.7],
[0.84]]
This will set markers for three phases where pixel locations with values
less than 0.5 will be used as a marker for phase 1, locations with values
between 0.6 and 0.7 will be used as a marker for phase 2, locations with
values greater than 0.84 will be used as a marker for phase 3.
outpath | string
the directory to save the metadata to
filename| the name of the file being saved
Returns:
--------
None
*will save a csv with metadata in the form of a table to the
specified directory. Ex:
Parameter|value
----------------------
name|0009_20CJ04_x7_A
------------------------------------
original_stack_shape|(544, 860, 872)
------------------------------------
processed_stack_shape|(272, 430, 436)
------------------------------------
rescaled_percentile_values|[0.05, 99.97]
------------------------------------
denoise_patch_size|10
------------------------------------
denoise_patch_distance|10
------------------------------------
elevation_map_algorithm|sobel
------------------------------------
phase 1 normalized pixel boundary|[0.5]
------------------------------------
phase 2 normalized pixel boundary|[0.6, 0.67]
------------------------------------
phase 3 normalized pixel boundary|[0.7, 0.83]
------------------------------------
phase 4 normalized pixel boundary|[0.84]
"""
for i, limit in enumerate(phase_limits):
segmentation_metadata.update(
{"phase {} normalized pixel boundary".format(i + 1): [limit]}
)
outfile = "{}_segmentation_metadata.csv".format(filename)
df = pd.DataFrame(
dict([(k, pd.Series(v)) for k, v in segmentation_metadata.items()])
).T.reset_index()
df.columns = ["Parameter", "value"]
df.to_csv(outpath + outfile, index=False)
print(
"Your metadata has been saved as:\n{}\n\nin the following directory: \n{}".format(
outfile, outpath
)
)