Source code for proteobench.io.params.sage

"""
Sage parameter extraction.
"""

import json
import os
import pathlib
from typing import Union

import pandas as pd

from proteobench.io.params import ProteoBenchParameters


[docs] def extract_params( fname: Union[str, pathlib.Path], json_file=os.path.join(os.path.dirname(__file__), "json/Quant/quant_lfq_DDA_ion.json"), ) -> ProteoBenchParameters: """ Parse Sage quantification tool JSON parameter file and extract relevant parameters. Parameters ---------- fname : str or pathlib.Path The path to the Sage JSON parameter file. Returns ------- ProteoBenchParameters The extracted parameters as a `ProteoBenchParameters` object. """ params = ProteoBenchParameters(json_file=json_file) try: # If the input is a file-like object (e.g., StringIO), decode it file_contents = fname.getvalue().decode("utf-8") data = json.loads(file_contents) except AttributeError: # Otherwise, treat it as a file path with open(fname, "r") as file_contents: data = json.load(file_contents) # Extract parameters from the JSON data params.software_name = "Sage" params.software_version = data["version"] params.search_engine = "Sage" params.search_engine_version = data["version"] params.enzyme = data["database"]["enzyme"]["cleave_at"] if params.enzyme == "KR" or params.enzyme == "RK": try: if data["database"]["enzyme"]["restrict"] == "P": params.enzyme = "Trypsin" except KeyError: params.enyzme = "Trypsin/P" params.allowed_miscleavages = data["database"]["enzyme"]["missed_cleavages"] if data["database"]["enzyme"]["semi_enzymatic"] is None: params.semi_enzymatic = False elif data["database"]["enzyme"]["semi_enzymatic"] is True: params.semi_enzymatic = True elif data["database"]["enzyme"]["semi_enzymatic"] is False: params.semi_enzymatic = False else: raise ValueError(f"Unknown value for semi_enzymatic: {data['database']['enzyme']['semi_enzymatic']}") params.fixed_mods = data["database"]["static_mods"] params.variable_mods = data["database"]["variable_mods"] try: _precursor_mass_tolerance = data["precursor_tol"]["ppm"] # add unit after each value in list _precursor_mass_tolerance = [str(val) + " ppm" for val in _precursor_mass_tolerance] params.precursor_mass_tolerance = "[" + ", ".join(_precursor_mass_tolerance) + "]" except KeyError: _precursor_mass_tolerance = data["precursor_tol"]["Da"] # add unit after each value in list _precursor_mass_tolerance = [str(val) + " Da" for val in params.precursor_mass_tolerance] params.precursor_mass_tolerance = "[" + ", ".join(_precursor_mass_tolerance) + "]" _fragment_mass_tolerance = data["fragment_tol"]["ppm"] # add unit after each value in list _fragment_mass_tolerance = [str(val) + " ppm" for val in _fragment_mass_tolerance] params.fragment_mass_tolerance = "[" + ", ".join(_fragment_mass_tolerance) + "]" params.min_peptide_length = int(data["database"]["enzyme"]["min_len"]) params.max_peptide_length = int(data["database"]["enzyme"]["max_len"]) params.max_mods = int(data["database"]["max_variable_mods"]) params.min_precursor_charge = int(data["precursor_charge"][0]) params.max_precursor_charge = int(data["precursor_charge"][1]) params.enable_match_between_runs = True return params
if __name__ == "__main__": """ Extract parameters from Sage JSON files and save them as CSV. """ from pathlib import Path from pprint import pprint files = [ Path("../../../test/params/sage_results.json"), Path("../../../test/params/sage_parameterfile.json"), ] for file in files: # Extract parameters from the file print(f"Extracting parameters from {file}") params = extract_params(file) # Convert the extracted parameters to a dictionary and then to a pandas Series data_dict = params.__dict__ pprint(params.__dict__) series = pd.Series(data_dict) # Write the Series to a CSV file series.to_csv(file.with_suffix(".csv"))