Exploring pipeline data#

Imports#

import pyopia.background
import pyopia.classify
import pyopia.instrument.silcam
import pyopia.instrument.holo
import pyopia.io
import pyopia.pipeline
import pyopia.plotting
import pyopia.process
import pyopia.statistics
import pyopia.exampledata

import os
import matplotlib.pyplot as plt

Installation note:#

These examples use ‘classification’ optional dependencies, which you should have installed (see here).

Setup and run a pipeline as normal#

model_path = pyopia.exampledata.get_example_model(os.getcwd())

# Prepare folders
os.makedirs('proc', exist_ok=True)

# remove pre-existing output file (as statistics for each image are appended to it)
datafile_nc = os.path.join('proc', 'test')
if os.path.isfile(datafile_nc + '-STATS.nc'):
  os.remove(datafile_nc + '-STATS.nc')
  
toml_settings = pyopia.io.load_toml('config.toml')

# Initialise the pipeline and run the initial steps
MyPipeline = pyopia.pipeline.Pipeline(toml_settings)

# Load an image (from the test suite)
filename = pyopia.exampledata.get_example_silc_image(os.getcwd())

# Process the image to obtain the stats dataframe
MyPipeline.run(filename)
stats = MyPipeline.data['stats']
Hide code cell output
Initialising pipeline
WARNING: Classification assumes loaded images have values in the range 0-255
Classify  ready with: {'model_path': 'keras_model.h5'}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files'])
Example image already exists. Skipping download.
SilCamLoad  ready with: {}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename'])
ImagePrep  ready with: {'image_level': 'imraw'}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename', 'timestamp', 'imraw'])
Segment  ready with: {'threshold': 0.85, 'segment_source': 'im_minimum'}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename', 'timestamp', 'imraw', 'im_minimum', 'imref'])
segment
clean
CalculateStats  ready with: {'export_outputpath': 'silcam_rois', 'roi_source': 'imref'}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename', 'timestamp', 'imraw', 'im_minimum', 'imref', 'imbw'])
statextract
21.7% saturation
measure
  870 particles found
EXTRACTING 870 IMAGES from 870
StatsToDisc  ready with: {'output_datafile': './test'}  and data dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename', 'timestamp', 'imraw', 'im_minimum', 'imref', 'imbw', 'stats'])

Pipeline data available#

Now the pipeline has finished, we can look at the keys in Pipeline.data to see what is available.

Full documentation is here: pyopia.pipeline.Data

print(MyPipeline.data.keys())
dict_keys(['cl', 'settings', 'skip_next_steps', 'raw_files', 'filename', 'timestamp', 'imraw', 'im_minimum', 'imref', 'imbw', 'stats'])

Visualise the reference, 2-D corrected image, and segmented image#

This helps us see the quality of the image that is passed to the pipeline, and how well the segmentation performed in separating particles from the background.

f, a = plt.subplots(1,3, figsize=(15,10))

a[0].imshow(MyPipeline.data['imref'], cmap='grey')
a[0].set_title('imref - reference colour image')
a[0].axis('off')
a[1].imshow(MyPipeline.data['im_minimum'], cmap='grey')
a[1].set_title('im_minimum - 2D image prior to segmentation')
a[1].axis('off')
a[2].imshow(~MyPipeline.data['imbw'], cmap='grey')
a[2].set_title('imbw - inverted segmented image')
a[2].axis('off')
plt.tight_layout()
../_images/5f51784ad0457a262145b8e55f21c365d8a56fcb7a618aedfbb7db6c94bc1193.png

Check classified size distribution statistics#

Now we can quickly plot the some size distributions and check what the classifier returned

MyPipeline.data['xstats'] = pyopia.io.make_xstats(MyPipeline.data['stats'], toml_settings)
xstats = MyPipeline.data['xstats']
dias, vd_total = pyopia.statistics.vd_from_stats(xstats, pyopia.pipeline.steps_from_xstats(xstats)['general']['pixel_size'])

stats = MyPipeline.data['stats']
oil = stats[stats['probability_oil'] > 0.5] # a very cruide extraction of particles with moderate probability of being oil
bubble = stats[stats['probability_bubble'] > 0.5] # a very cruide extraction of particles with moderate probability of being bubbles

dias, vd_oil = pyopia.statistics.vd_from_stats(oil, pyopia.pipeline.steps_from_xstats(xstats)['general']['pixel_size'])
dias, vd_bubble = pyopia.statistics.vd_from_stats(bubble, pyopia.pipeline.steps_from_xstats(xstats)['general']['pixel_size'])


plt.plot(dias, vd_total, 'k', label=f"total")
plt.plot(dias, vd_oil, '--', color='r', label="oil")
plt.plot(dias, vd_bubble, '--', color='b', label="bubble")
plt.xscale('log')
plt.xlabel('ECD [um]')
plt.ylabel('Volume Distribution [uL/sample vol.]')
plt.legend();
../_images/db3c6d0f0c099f45762316f8713c69ad70b4fb5f4aa27eff1cafc8db22eb0c35.png

Crude classification histogram#

We take take a look at a summary of the number frequency distribution of the highest probability class from the classifier like this.

Note: Often for oil and bubbles, we work with volume distributions - the below histogram is number-based so will be biassed towards the very smallest particles for typical droplet size distributions.

stats = pyopia.statistics.add_best_guesses_to_stats(stats)

plt.hist(stats['best guess'])
plt.xticks(rotation=90);
../_images/4bb07e0caf8d36c216cc5de2f74a07abeffd3288f5b0ed1eb5fbb01d199481da.png