Source code for proteobench.modules.template.datapoint

import json
from dataclasses import asdict, dataclass
from datetime import datetime


[docs]@dataclass
class Datapoint:
    """Data used to store the experimental metadata and data analysis settings.

    Example for attributes:
        id: A unique identifier for the datapoint.
        is_temporary: A boolean flag indicating whether the datapoint is temporary or not.
        search_engine: The name of the search engine used for the experiment.
        software_version: The version number of the software used for the experiment.
        fdr_psm: The false discovery rate at the peptide-spectrum match level.
        fdr_peptide: The false discovery rate at the peptide level.
        fdr_protein: The false discovery rate at the protein level.
        MBR: A boolean flag indicating whether match-between-runs was enabled or not.
        precursor_tol: The precursor mass tolerance in units specified by precursor_tol_unit.
        precursor_tol_unit: The unit of the precursor mass tolerance. Either "Da" or "ppm".
        fragment_tol: The fragment mass tolerance in units specified by fragment_tol_unit.
        fragment_tol_unit: The unit of the fragment mass tolerance. Either "Da" or "ppm".
        enzyme_name: The name of the enzyme used for digestion.
        missed_cleavages: The number of allowed missed cleavages during digestion.
        min_pep_length: The minimum peptide length for identification.
        max_pep_length: The maximum peptide length for identification.
        weighted_sum: The weighted sum score used for protein inference.
        nr_prec: The number of precursors used for protein inference.
    """

    # Fixed metadata
    id: str = None
    is_temporary: bool = True
    # add/remove for each module
    search_engine: str = None
    software_version: int = 0
    fdr_psm: int = 0
    fdr_peptide: int = 0
    fdr_protein: int = 0
    MBR: bool = False
    precursor_tol: int = 0
    precursor_tol_unit: str = "Da"
    fragment_tol: int = 0
    fragment_tol_unit: str = "Da"
    enzyme_name: str = None
    missed_cleavages: int = 0
    min_pep_length: int = 0
    max_pep_length: int = 0

[docs]    def calculate_benchmarking_metric_1(self, intermediate_data):
        """Calculates the first benchmarking metric based on the intermediate data.

        Args:
            intermediate_data (dict): A dictionary containing the intermediate data.

        Returns:
            metric_1 (float): The value of the first benchmarking metric.
        """
        # TODO: calculate metric 1
        metric_1 = 0

        return metric_1

[docs]    def calculate_benchmarking_metric_2(self, intermediate_data):
        """Calculates the second benchmarking metric based on the intermediate data.

        Args:
            intermediate_data (dict): A dictionary containing the intermediate data.

        Returns:
            metric_2 (float): The value of the second benchmarking metric.
        """
        # TODO: calculate metric 2
        metric_2 = 0

        return metric_2

    # TODO: add more functions to calculate the benchmarking metrics

    # Leave this functions as it is
[docs]    def generate_id(self):
        """Generates a unique id for the datapoint based on the search engine and software version.

        Sets the id attribute to a string composed of the search engine name, software version number,
        and current timestamp separated by underscores. Prints the id to stdout.
        """
        self.id = self.search_engine + "_" + str(self.software_version) + "_" + str(datetime.timestamp(datetime.now()))

[docs]    def dump_json_object(self, file_name):
        """Dumps the datapoint as a JSON object to a file.

        Args:
            file_name (str): The name of the file to write to.

        Writes a JSON representation of the datapoint to a file with the given name. Appends the JSON object
        to the end of the file if it already exists.
        """
        f = open(file_name, "a")
        f.write(json.dumps(asdict(self)))
        f.close()