Source code for proteobench.io.parsing.utils
"""Utilities for parsing modifications."""
import re
import pandas as pd
import psm_utils as pu
from proteobench.io.params import ProteoBenchParameters
# MaxQuant fixed modifications are exposed in the ProForma-like notation produced by
# ``io.params.maxquant._homogenize_mod`` (e.g. ``"C[Carbamidomethyl]"`` or
# ``"Protein N-term[Acetyl]"``), comma-separated. Match a residue specifier followed
# by a bracketed modification name.
_PROFORMA_FIXED_MOD = re.compile(r"^(?P<residues>[^\[]+)\[(?P<name>[^\]]+)\]$")
[docs]
def add_fixed_mod(proforma: str, mod_name: str, aas: str) -> str:
"""
Add a single fixed modification to a peptide in ProForma format.
Parameters
----------
proforma : str
Peptide in ProForma format.
mod_name : str
Name of the modification to add as a fixed modification.
aas : str
The amino acid whereon the fixed modifications should be registered.
Returns
-------
str
The modified peptide in ProForma format.
"""
proforma, charge = proforma.split("/")
peptidoform = pu.Peptidoform(proforma)
peptidoform.add_fixed_modifications([(mod_name, aas)])
peptidoform.apply_fixed_modifications()
return peptidoform.proforma + "/" + charge
[docs]
def add_maxquant_fixed_modifications(params: ProteoBenchParameters, result_perf: pd.DataFrame) -> pd.DataFrame:
"""
Format MaxQuant modifications.
Parameters
----------
params : ProteoBenchParameters
ProteoBenchParameters object from MaxQuant results. Contains modifications in `fixed_mods` attribute.
result_perf : pd.DataFrame
The benchmarking results.
Returns
-------
pd.DataFrame
Results of benchmarking with parsed modifications.
Notes
-----
``params.fixed_mods`` uses ProForma-like notation (``"C[Carbamidomethyl]"``,
comma-separated). Empty values (no fixed modifications), terminal modifications
(e.g. ``"Protein N-term[Acetyl]"``), and unrecognised tokens are skipped rather
than raised, so a submission is never blocked by modification formatting.
"""
fixed_mods = getattr(params, "fixed_mods", None)
if not isinstance(fixed_mods, str) or not fixed_mods.strip():
return result_perf
for token in fixed_mods.split(","):
token = token.strip()
if not token:
continue
match = _PROFORMA_FIXED_MOD.match(token)
if match is None:
# Unrecognised format; skip rather than crash the whole submission.
continue
residues = match.group("residues").strip()
mod_name = match.group("name").strip()
# Terminal fixed modifications are not applied at the residue level here.
if "-term" in residues.lower():
continue
aas_list = list(residues)
result_perf["precursor ion"] = result_perf["precursor ion"].apply(add_fixed_mod, args=(mod_name, aas_list))
return result_perf