Source code for proteobench.modules.quant.quant_lfq_ion_DDA_Astral

"""
DDA Quantification Module for Ion level Quantification.
"""

from __future__ import annotations
from typing import Dict, Tuple

import pandas as pd
from pandas import DataFrame

from proteobench.datapoint.quant_datapoint import QuantDatapoint
from proteobench.exceptions import (
    ConvertStandardFormatError,
    IntermediateFormatGenerationError,
    ParseError,
    ParseSettingsError,
    QuantificationError,
)
from proteobench.io.parsing.parse_ion import load_input_file
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.modules.constants import MODULE_SETTINGS_DIRS
from proteobench.modules.quant.quant_base_module import QuantModule
from proteobench.score.quant.quantscores import QuantScores
from proteobench.modules.quant.benchmarking import run_benchmarking_with_timing



[docs]
class DDAQuantIonAstralModule(QuantModule):
    """
    DDA Quantification Module for Ion level Quantification.

    Parameters
    ----------
    token : str
        GitHub token for the user.
    proteobot_repo_name : str, optional
        Name of the repository for pull requests and where new points are added, by default "Proteobot/Results_quant_ion_DDA".
    proteobench_repo_name : str, optional
        Name of the repository where the benchmarking results will be stored, by default "Proteobench/Results_quant_ion_DDA".

    Attributes
    ----------
    module_id : str
        Module identifier for configuration.
    precursor_column_name: str
        Level of quantification.
    """

    module_id = "quant_lfq_DDA_ion_Astral"

    def __init__(
        self,
        token: str,
        proteobot_repo_name: str = "Proteobot/Results_quant_ion_DDA_Astral",
        proteobench_repo_name: str = "Proteobench/Results_quant_ion_DDA_Astral",
    ):
        """
        Initialize the DDA Quantification Module for Ion level Quantification for Astral.

        Parameters
        ----------
        token : str
            GitHub token for the user.
        proteobot_repo_name : str, optional
            Name of the repository for pull requests and where new points are added, by default "Proteobot/Results_quant_ion_DDA".
        proteobench_repo_name : str, optional
            Name of the repository where the benchmarking results will be stored, by default "Proteobench/Results_quant_ion_DDA".
        """

        super().__init__(
            token,
            proteobot_repo_name=proteobot_repo_name,
            proteobench_repo_name=proteobench_repo_name,
            parse_settings_dir=MODULE_SETTINGS_DIRS[self.module_id],
            module_id=self.module_id,
        )
        self.precursor_column_name = "precursor ion"


[docs]
    def is_implemented(self) -> bool:
        """
        Return whether the module is fully implemented.

        Returns
        -------
        bool
            True if the module is fully implemented, False otherwise.
        """
        return True



[docs]
    def benchmarking(
        self,
        input_file_loc: any,
        input_format: str,
        user_input: dict,
        all_datapoints: pd.DataFrame,
        default_cutoff_min_prec: int = 3,
    ) -> tuple[DataFrame, DataFrame, DataFrame]:
        """
        Main workflow of the module. Used to benchmark workflow results.

        Parameters
        ----------
        input_file_loc : any
            Path to the workflow output file.
        input_format : str
            Format of the workflow output file.
        user_input : dict
            User provided parameters for plotting.
        all_datapoints : pd.DataFrame
            DataFrame containing all datapoints from the proteobench repo.
        default_cutoff_min_prec : int
            Minimum number of runs an ion has to be identified in.

        Returns
        -------
        tuple[DataFrame, DataFrame, DataFrame]
            Tuple containing the intermediate data structure, all datapoints, and the input DataFrame.
        """
        result = run_benchmarking_with_timing(
            input_file=input_file_loc,
            input_format=input_format,
            user_input=user_input,
            all_datapoints=all_datapoints,
            parse_settings_dir=self.parse_settings_dir,
            module_id=self.module_id,
            precursor_column_name=self.precursor_column_name,
            default_cutoff_min_prec=default_cutoff_min_prec,
            add_datapoint_func=self.add_current_data_point,
        )
        # Return only the first three elements (without timings)
        return result[:3]



[docs]
    def benchmarking_2(
        self,
        input_file_loc: str,
        input_format: str,
        user_input: dict[str, object],
        all_datapoints: pd.DataFrame,
        default_cutoff_min_prec: int = 3,
    ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict[str, float]]:
        """
        Main workflow of the module with timing information. Used to benchmark workflow results.

        Parameters
        ----------
        input_file_loc : str
            Path to the workflow output file.
        input_format : str
            Format of the workflow output file.
        user_input : dict[str, object]
            User provided parameters for plotting.
        all_datapoints : pd.DataFrame
            DataFrame containing all datapoints from the proteobench repo.
        default_cutoff_min_prec : int, optional
            Minimum number of runs an ion has to be identified in (default is 3).

        Returns
        -------
        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict[str, float]]
            A 4-tuple containing:
              - intermediate_metric_structure (pd.DataFrame)
              - all_datapoints (pd.DataFrame)
              - input_df (pd.DataFrame)
              - timings (dict of step names to elapsed seconds)
        """
        return run_benchmarking_with_timing(
            input_file=input_file_loc,
            input_format=input_format,
            user_input=user_input,
            all_datapoints=all_datapoints,
            parse_settings_dir=self.parse_settings_dir,
            module_id=self.module_id,
            precursor_column_name=self.precursor_column_name,
            default_cutoff_min_prec=default_cutoff_min_prec,
            add_datapoint_func=self.add_current_data_point,
        )