Source code for proteobench.io.parsing.utils

"""Utilities for parsing modifications."""

import re

import pandas as pd
import psm_utils as pu

from proteobench.io.params import ProteoBenchParameters

# MaxQuant fixed modifications are exposed in the ProForma-like notation produced by
# ``io.params.maxquant._homogenize_mod`` (e.g. ``"C[Carbamidomethyl]"`` or
# ``"Protein N-term[Acetyl]"``), comma-separated. Match a residue specifier followed
# by a bracketed modification name.
_PROFORMA_FIXED_MOD = re.compile(r"^(?P<residues>[^\[]+)\[(?P<name>[^\]]+)\]$")


[docs] def add_fixed_mod(proforma: str, mod_name: str, aas: str) -> str: """ Add a single fixed modification to a peptide in ProForma format. Parameters ---------- proforma : str Peptide in ProForma format. mod_name : str Name of the modification to add as a fixed modification. aas : str The amino acid whereon the fixed modifications should be registered. Returns ------- str The modified peptide in ProForma format. """ proforma, charge = proforma.split("/") peptidoform = pu.Peptidoform(proforma) peptidoform.add_fixed_modifications([(mod_name, aas)]) peptidoform.apply_fixed_modifications() return peptidoform.proforma + "/" + charge
[docs] def add_maxquant_fixed_modifications(params: ProteoBenchParameters, result_perf: pd.DataFrame) -> pd.DataFrame: """ Format MaxQuant modifications. Parameters ---------- params : ProteoBenchParameters ProteoBenchParameters object from MaxQuant results. Contains modifications in `fixed_mods` attribute. result_perf : pd.DataFrame The benchmarking results. Returns ------- pd.DataFrame Results of benchmarking with parsed modifications. Notes ----- ``params.fixed_mods`` uses ProForma-like notation (``"C[Carbamidomethyl]"``, comma-separated). Empty values (no fixed modifications), terminal modifications (e.g. ``"Protein N-term[Acetyl]"``), and unrecognised tokens are skipped rather than raised, so a submission is never blocked by modification formatting. """ fixed_mods = getattr(params, "fixed_mods", None) if not isinstance(fixed_mods, str) or not fixed_mods.strip(): return result_perf for token in fixed_mods.split(","): token = token.strip() if not token: continue match = _PROFORMA_FIXED_MOD.match(token) if match is None: # Unrecognised format; skip rather than crash the whole submission. continue residues = match.group("residues").strip() mod_name = match.group("name").strip() # Terminal fixed modifications are not applied at the residue level here. if "-term" in residues.lower(): continue aas_list = list(residues) result_perf["precursor ion"] = result_perf["precursor ion"].apply(add_fixed_mod, args=(mod_name, aas_list)) return result_perf