Source code for proteobench.datapoint.datapoint_base

"""
Abstract base class for datapoint modules.
"""

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict

import pandas as pd



[docs]
@dataclass
class DatapointBase(ABC):
    """
    Abstract base class for benchmark datapoints.

    This class defines the interface that all datapoint types must implement,
    allowing for modular and extensible datapoint handling for different benchmarking modules.

    Subclasses should define their own attributes specific to their benchmarking module.
    """


[docs]
    @abstractmethod
    def generate_id(self) -> None:
        """
        Generate a unique ID for the benchmark run.

        This ID should uniquely identify each run of the benchmark.
        """
        pass



[docs]
    @staticmethod
    @abstractmethod
    def generate_datapoint(intermediate: pd.DataFrame, input_format: str, user_input: dict, **kwargs) -> pd.Series:
        """
        Generate a datapoint object containing metadata and results from the benchmark run.

        Parameters
        ----------
        intermediate : pd.DataFrame
            The intermediate DataFrame containing benchmark results.
        input_format : str
            The format of the input data (e.g., software tool name).
        user_input : dict
            User-defined input values for the benchmark.
        **kwargs : dict
            Additional module-specific parameters.

        Returns
        -------
        pd.Series
            A Pandas Series containing the datapoint's attributes as key-value pairs.
        """
        pass



[docs]
    @staticmethod
    @abstractmethod
    def get_metrics(df: pd.DataFrame, **kwargs) -> Dict[int, Dict[str, float]]:
        """
        Compute statistical metrics from the provided DataFrame.

        Parameters
        ----------
        df : pd.DataFrame
            DataFrame containing the intermediate results.
        **kwargs : dict
            Additional module-specific parameters.

        Returns
        -------
        Dict[int, Dict[str, float]]
            Dictionary mapping quantification cutoffs to their computed metrics.
        """
        pass