Source code for proteobench.modules.denovo.denovo_DDA_HCD

"""
De Novo Module DDA-HCD spectra.
"""

from __future__ import annotations

import pandas as pd
from pandas import DataFrame

from proteobench.datapoint.denovo_datapoint import DenovoDatapoint
from proteobench.exceptions import (
    ConvertStandardFormatError,
    IntermediateFormatGenerationError,
    ParseError,
    ParseSettingsError,
    QuantificationError,
)
from proteobench.io.parsing.parse_denovo import load_input_file
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.modules.constants import MODULE_SETTINGS_DIRS
from proteobench.modules.denovo.denovo_base import DeNovoModule
from proteobench.score.denovoscores import DenovoScores


[docs] class DDAHCDDeNovoModule(DeNovoModule): """ De Novo Module. """ module_id = "denovo_DDA_HCD" def __init__( self, token: str, proteobot_repo_name: str = "Proteobot/Results_denovo_lfq_DDA_HCD", proteobench_repo_name: str = "Proteobench/Results_denovo_lfq_DDA_HCD", ): """ Initialize the DDA Quantification Module for Ion level Quantification. Parameters ---------- token : str GitHub token for the user. proteobot_repo_name : str, optional Name of the repository for pull requests and where new points are added, by default "Proteobot/Results_quant_ion_DDA". proteobench_repo_name : str, optional Name of the repository where the benchmarking results will be stored, by default "Proteobench/Results_quant_ion_DDA". """ super().__init__( token, proteobot_repo_name=proteobot_repo_name, proteobench_repo_name=proteobench_repo_name, parse_settings_dir=MODULE_SETTINGS_DIRS[self.module_id], module_id=self.module_id, )
[docs] def is_implemented(self) -> bool: """ Return whether the module is fully implemented. Returns ------- bool Always returns True in this implementation. """ return False
[docs] def benchmarking( self, input_file_loc: any, input_format: str, user_input: dict, all_datapoints: pd.DataFrame, evaluation_type: str = "mass", ) -> tuple[DataFrame, DataFrame, DataFrame]: """ Main workflow of the module. Used to benchmark workflow results. Parameters ---------- input_file_loc : any Path to the workflow output file. input_format : str Format of the workflow output file. user_input : dict User provided parameters for plotting. all_datapoints : pd.DataFrame DataFrame containing all datapoints from the proteobench repo. level : str The level precision and recall is calculated. Either `precision` or `recall` evaluation_type : str The evaluation type for precision calculation. Either `exact` or `mass`-based Returns ------- tuple[DataFrame, DataFrame, DataFrame] Tuple containing the intermediate data structure, all datapoints, and the input DataFrame. """ # Parse workflow output file try: input_df = load_input_file(input_file_loc, input_format) except pd.errors.ParserError as e: raise ParseError( f"Error parsing {input_format} file, please make sure the format is correct and the correct software tool is chosen: {e}" ) from e except Exception as e: raise ParseSettingsError("Error parsing the input file.") from e msg = f"Folder: {self.parse_settings_dir}, Module: {self.module_id}" # Parse settings file try: parse_settings = ParseSettingsBuilder( parse_settings_dir=self.parse_settings_dir, module_id=self.module_id ).build_parser(input_format) except KeyError as e: raise ParseSettingsError( f"Error parsing settings file for parsing, settings seem to be missing: {msg}" ) from e except FileNotFoundError as e: raise ParseSettingsError(f"Could not find the parsing settings file: {msg}") from e except Exception as e: raise ParseSettingsError(f"Error parsing settings file for parsing: {msg}") from e try: standard_format = parse_settings.convert_to_standard_format(input_df) except KeyError as e: raise ConvertStandardFormatError("Error converting to standard format, key missing.") from e except Exception as e: raise ConvertStandardFormatError("Error converting to standard format.") from e # Instantiate de novo scores denovo_score = DenovoScores() # generate intermediate data structure (Calculate the scores) try: intermediate_metric_structure = denovo_score.generate_intermediate(standard_format) except Exception as e: raise IntermediateFormatGenerationError("Error generating intermediate data structure.") from e # try: dtp = DenovoDatapoint() current_datapoint = dtp.generate_datapoint( intermediate=intermediate_metric_structure, input_format=input_format, user_input=user_input, evaluation_type=evaluation_type, ) all_datapoints = self.add_current_data_point(current_datapoint, all_datapoints=all_datapoints) return ( intermediate_metric_structure, all_datapoints, input_df, )