Source code for proteobench.modules.quant.quant_lfq_ion_DDA_Astral

"""
DDA Quantification Module for Ion level Quantification.
"""

from __future__ import annotations

from typing import Dict, Optional, Tuple

import pandas as pd
from pandas import DataFrame

from proteobench.modules.constants import MODULE_SETTINGS_DIRS
from proteobench.modules.quant.benchmarking import run_benchmarking_with_timing
from proteobench.modules.quant.quant_base_module import QuantModule


[docs] class DDAQuantIonAstralModule(QuantModule): """ DDA Quantification Module for Ion level Quantification. Parameters ---------- token : str GitHub token for the user. proteobot_repo_name : str, optional Name of the repository for pull requests and where new points are added, by default "Proteobot/Results_quant_ion_DDA". proteobench_repo_name : str, optional Name of the repository where the benchmarking results will be stored, by default "Proteobench/Results_quant_ion_DDA". Attributes ---------- module_id : str Module identifier for configuration. precursor_column_name: str Level of quantification. """ module_id = "quant_lfq_DDA_ion_Astral" def __init__( self, token: str, proteobot_repo_name: str = "Proteobot/Results_quant_ion_DDA_Astral", proteobench_repo_name: str = "Proteobench/Results_quant_ion_DDA_Astral", branch: Optional[str] = None, ): """ Initialize the DDA Quantification Module for Ion level Quantification for Astral. Parameters ---------- token : str GitHub token for the user. proteobot_repo_name : str, optional Name of the repository for pull requests and where new points are added, by default "Proteobot/Results_quant_ion_DDA". proteobench_repo_name : str, optional Name of the repository where the benchmarking results will be stored, by default "Proteobench/Results_quant_ion_DDA". """ super().__init__( token, proteobot_repo_name=proteobot_repo_name, proteobench_repo_name=proteobench_repo_name, parse_settings_dir=MODULE_SETTINGS_DIRS[self.module_id], module_id=self.module_id, branch=branch, ) self.precursor_column_name = "precursor ion"
[docs] def is_implemented(self) -> bool: """ Return whether the module is fully implemented. Returns ------- bool True if the module is fully implemented, False otherwise. """ return True
[docs] def benchmarking( self, input_file_loc: any, input_format: str, user_input: dict, all_datapoints: pd.DataFrame, default_cutoff_min_prec: int = 3, input_file_secondary: str = None, max_nr_observed: int = None, ) -> tuple[DataFrame, DataFrame, DataFrame]: """ Main workflow of the module. Used to benchmark workflow results. Parameters ---------- input_file_loc : any Path to the workflow output file. input_format : str Format of the workflow output file. user_input : dict User provided parameters for plotting. all_datapoints : pd.DataFrame DataFrame containing all datapoints from the proteobench repo. default_cutoff_min_prec : int Minimum number of runs an ion has to be identified in. input_file_secondary : str, optional Path to a secondary input file (used for some formats like AlphaDIA). Returns ------- tuple[DataFrame, DataFrame, DataFrame] Tuple containing the intermediate data structure, all datapoints, and the input DataFrame. """ result = run_benchmarking_with_timing( input_file=input_file_loc, input_format=input_format, user_input=user_input, all_datapoints=all_datapoints, parse_settings_dir=self.parse_settings_dir, module_id=self.module_id, precursor_column_name=self.precursor_column_name, default_cutoff_min_prec=default_cutoff_min_prec, add_datapoint_func=self.add_current_data_point, input_file_secondary=input_file_secondary, max_nr_observed=max_nr_observed, ) # Return only the first three elements (without timings) return result[:3]
[docs] def benchmarking_2( self, input_file_loc: str, input_format: str, user_input: dict[str, object], all_datapoints: pd.DataFrame, default_cutoff_min_prec: int = 3, ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict[str, float]]: """ Main workflow of the module with timing information. Used to benchmark workflow results. Parameters ---------- input_file_loc : str Path to the workflow output file. input_format : str Format of the workflow output file. user_input : dict[str, object] User provided parameters for plotting. all_datapoints : pd.DataFrame DataFrame containing all datapoints from the proteobench repo. default_cutoff_min_prec : int, optional Minimum number of runs an ion has to be identified in (default is 3). Returns ------- tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict[str, float]] A 4-tuple containing: - intermediate_metric_structure (pd.DataFrame) - all_datapoints (pd.DataFrame) - input_df (pd.DataFrame) - timings (dict of step names to elapsed seconds) """ return run_benchmarking_with_timing( input_file=input_file_loc, input_format=input_format, user_input=user_input, all_datapoints=all_datapoints, parse_settings_dir=self.parse_settings_dir, module_id=self.module_id, precursor_column_name=self.precursor_column_name, default_cutoff_min_prec=default_cutoff_min_prec, add_datapoint_func=self.add_current_data_point, )
[docs] def get_plot_generator(self): return super().get_plot_generator() return super().get_plot_generator()