Source code for src.actions.eda
import logging
from typing import List, Dict
from src.data_model.data_model import LeafSequence, MaskSequence
LOGGER = logging.getLogger(__name__)
# *===========================================================================*
[docs]def extract_full_eda_df(mseq_list: List[MaskSequence],
options: Dict,
output_path_list: List[str],
lseq_list: List[LeafSequence] = None) -> None:
"""
Creates and saves a list of full size image EDA dataframes from a list
MaskSequences.
:param mseq_list: A list of MaskSequence objects
:param options: the options of what should be included in the dataframe;
the option name should be the key and the value should be either true or
false
:param output_path_list: the list of output csv file paths
:param lseq_list: A list of LeafSequence objects; this is only required if
the linked filename option is used
:return: None
"""
for i, (mseq, csv_output_path) in enumerate(zip(mseq_list,
output_path_list)):
# less memory intensive for images to be loaded here
LOGGER.info(f"Creating {mseq.num_files} image objects for "
f"{mseq.__class__.__name__} located at {mseq.folder_path}")
mseq.load_extracted_images(load_image=True)
if options["linked_filename"]:
mseq.link_sequences(lseq_list[i])
_ = mseq.get_eda_dataframe(options, csv_output_path)
mseq.unload_extracted_images()
[docs]def extract_tiles_eda_df(mseq_list: List[MaskSequence],
options: Dict,
output_path_list: List[str],
lseq_list: List[LeafSequence] = None) -> None:
"""
Creates and saves a list of tile image EDA dataframes from a list
MaskSequences.
:param mseq_list: A list of MaskSequence objects
:param options: the options of what should be included in the dataframe;
the option name should be the key and the value should be either true or
false
:param output_path_list: the list of output csv file paths
:param lseq_list: A list of LeafSequence objects; this is only required if
the linked filename option is used
:return: None
"""
for i, (mseq, csv_output_path) in enumerate(zip(mseq_list, output_path_list)):
LOGGER.info(f"Creating {mseq.num_files} image objects for "
f"{mseq.__class__.__name__} located at {mseq.folder_path}")
if options["linked_filename"]:
mseq.link_sequences(lseq_list[i])
_ = mseq.get_tile_eda_df(options, csv_output_path)
mseq.unload_extracted_images()
[docs]def extract_full_databunch_df(lseq_list: List[LeafSequence],
mseq_list: List[MaskSequence],
output_path_list: List[str],
embolism_only=False) -> None:
"""
Extracts a databunch dataframe of full size images. The first field is
the leaf path and the second field is the mask name. This is useful for
Fastai.
:param lseq_list: A list of LeafSequence objects
:param mseq_list: A list of MaskSequence objects
:param output_path_list: the list of output csv file paths
:param embolism_only: whether only leaves with embolisms should be used
:return: None
"""
for csv_output_path, lseq, mseq in zip(output_path_list,
lseq_list, mseq_list):
mseq.load_extracted_images(load_image=True)
mseq.get_embolism_percent_list()
mseq.get_has_embolism_list()
lseq.link_sequences(mseq)
LOGGER.info(f"Creating DataBunch DataFrame using "
f"{lseq.__class__.__name__} located at {lseq.folder_path} "
f"and {mseq.__class__.__name__} located at"
f" {mseq.folder_path}")
# get_databunch_dataframe written into of a lseq, i.e. will always
# report lseq regardless of whether it is longer or shorter
_ = lseq.get_databunch_dataframe(embolism_only, csv_output_path)
[docs]def extract_tiles_databunch_df(lseq_list: List[LeafSequence],
mseq_list: List[MaskSequence],
output_path_list: List[str],
tile_embolism_only: bool = False,
leaf_embolism_only: bool = False) -> None:
"""
Extracts a databunch dataframe of full size images. The first field is
the leaf path and the second field is the mask name. This is useful for
Fastai.
:param lseq_list: A list of LeafSequence objects
:param mseq_list: A list of MaskSequence objects
:param output_path_list: the list of output csv file paths
:param tile_embolism_only: whether only tiles with embolisms should be used
:param leaf_embolism_only: whether only leaves with embolisms should be
used
:return: None
"""
for csv_output_path, lseq, mseq in zip(output_path_list,
lseq_list, mseq_list):
lseq.link_sequences(mseq)
LOGGER.info(f"Creating Tile DataBunch DataFrame using "
f"{lseq.__class__.__name__} located at {lseq.folder_path} "
f"and {mseq.__class__.__name__} located at"
f" {mseq.folder_path}")
_ = lseq.get_tile_databunch_df(mseq, tile_embolism_only,
leaf_embolism_only,
csv_output_path)
# *===========================================================================*