Source code for proteobench.datapoint.datapoint_base

"""
Abstract base class for datapoint modules.
"""

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict

import pandas as pd


[docs] @dataclass class DatapointBase(ABC): """ Abstract base class for benchmark datapoints. This class defines the interface that all datapoint types must implement, allowing for modular and extensible datapoint handling for different benchmarking modules. Subclasses should define their own attributes specific to their benchmarking module. """
[docs] @abstractmethod def generate_id(self) -> None: """ Generate a unique ID for the benchmark run. This ID should uniquely identify each run of the benchmark. """ pass
[docs] @staticmethod @abstractmethod def generate_datapoint(intermediate: pd.DataFrame, input_format: str, user_input: dict, **kwargs) -> pd.Series: """ Generate a datapoint object containing metadata and results from the benchmark run. Parameters ---------- intermediate : pd.DataFrame The intermediate DataFrame containing benchmark results. input_format : str The format of the input data (e.g., software tool name). user_input : dict User-defined input values for the benchmark. **kwargs : dict Additional module-specific parameters. Returns ------- pd.Series A Pandas Series containing the datapoint's attributes as key-value pairs. """ pass
[docs] @staticmethod @abstractmethod def get_metrics(df: pd.DataFrame, **kwargs) -> Dict[int, Dict[str, float]]: """ Compute statistical metrics from the provided DataFrame. Parameters ---------- df : pd.DataFrame DataFrame containing the intermediate results. **kwargs : dict Additional module-specific parameters. Returns ------- Dict[int, Dict[str, float]] Dictionary mapping quantification cutoffs to their computed metrics. """ pass