"""
Extract parameters from a MetaMorpheus TOML file and convert them to a pandas Series.
"""
import os
import tomllib as toml
from io import BytesIO
from pathlib import Path, PosixPath
from typing import IO, Tuple, Union
import pandas as pd
from proteobench.io.params import ProteoBenchParameters
[docs]
def identify_file_type(file: Union[str, IO]) -> str:
"""
Identify whether a single MetaMorpheus file is the TOML settings file or the version text file.
Parameters
----------
file : Union[str, IO]
Path string or file-like object to inspect.
Returns
-------
str
``"toml"`` if the file parses as a TOML settings file, ``"version"`` otherwise.
"""
if isinstance(file, (str, PosixPath, Path)):
try:
with open(file, "rb") as f:
toml.load(f)
return "toml"
except Exception:
return "version"
elif hasattr(file, "read"):
try:
file.seek(0)
content = file.read()
if isinstance(content, str):
content = content.encode("utf-8")
toml.loads(content.decode("utf-8"))
return "toml"
except Exception:
return "version"
finally:
try:
file.seek(0)
except Exception:
pass
return "version"
[docs]
def get_incomplete_upload_warning(files: list) -> str:
"""
Return a user-facing warning string when fewer than two MetaMorpheus files are uploaded.
Parameters
----------
files : list
List of uploaded file objects (expected to contain exactly one element).
Returns
-------
str
Warning message describing which file is missing.
"""
file_type = identify_file_type(files[0]) if files else None
if file_type == "toml":
return (
"You uploaded the MetaMorpheus search task settings file (.toml). "
"Please also upload the version file (the plain-text file containing "
"the MetaMorpheus version, typically named 'allResults.txt' or similar)."
)
return "You uploaded the MetaMorpheus version file. " "Please also upload the search task settings file (.toml)."
def _homogenize_mod(mod_str: str) -> str:
"""Convert MetaMorpheus modification format to ProForma-like notation.
MetaMorpheus format: ``{modname} on {residue}``
with optional terminal qualifiers like ``(Pep N-term)`` or ``(Prot N-term)``.
Examples:
``Carbamidomethyl on C`` -> ``C[Carbamidomethyl]``
``Acetylation on X (Prot N-term)`` -> ``Protein N-term[Acetylation]``
``Oxidation on M`` -> ``M[Oxidation]``
"""
mod_str = mod_str.strip()
if " on " not in mod_str:
return mod_str
name, residue_part = mod_str.split(" on ", 1)
residue_part = residue_part.strip()
if "(Prot N-Term)" in residue_part:
return f"Protein N-term[{name}]"
elif "(Pep N-Term)" in residue_part:
return f"N-term[{name}]"
elif "(Prot C-Term)" in residue_part:
return f"Protein C-term[{name}]"
elif "(Pep C-Term)" in residue_part:
return f"C-term[{name}]"
else:
return f"{residue_part}[{name}]"
[docs]
def load_files(file1: Union[str, IO], file2: Union[str, IO]) -> Tuple[Union[str, None], Union[dict, None]]:
"""
Load two files (IO objects or file paths), returning:
- The first line from a plain text file as the version string
- A dictionary parsed from a TOML file
Returns
-------
Tuple[Union[str, None], Union[dict, None]]
versions_line, settings_dict
"""
versions_line = None
settings = None
def try_parse(file: Union[str, IO]):
nonlocal versions_line, settings
# Case 1: Path
if isinstance(file, (str, PosixPath, Path)):
# Try TOML
try:
with open(file, "rb") as f:
settings_candidate = toml.load(f)
settings = settings_candidate
return
except Exception:
pass
# Try version line
try:
with open(file, "r", encoding="utf-8") as f:
versions_line = f.readline().strip()
return
except Exception:
pass
# Case 2: IO object
elif hasattr(file, "read"):
try:
file.seek(0)
# Try loading directly (only works if binary)
settings_candidate = toml.load(file)
settings = settings_candidate
return
except Exception:
pass
try:
# Try to convert to binary buffer if in text mode
file.seek(0)
content = file.read()
if isinstance(content, str):
buffer = BytesIO(content.encode("utf-8"))
settings_candidate = toml.load(buffer)
settings = settings_candidate
return
except Exception:
pass
try:
file.seek(0)
line = file.readline()
if isinstance(line, bytes):
line = line.decode("utf-8", errors="replace")
versions_line = line.strip()
return
except Exception:
pass
for f in (file1, file2):
try_parse(f)
if versions_line and settings:
print("Successfully parsed both versions and settings.")
else:
print("Could not identify both versions and settings from the provided files.")
return versions_line, settings
[docs]
def parse_modifications(mods: str) -> list:
"""
Parse modifications from a string or list format into a standardized list.
Parameters
----------
mods : Union[str]
Modifications in string format (e.g., ""Common Fixed\tCarbamidomethyl on C\t\tCommon Fixed\tCarbamidomethyl on U"")
Returns
-------
list
List of modifications.
"""
parsed_mod_list = []
mod_list = mods.split("\t\t")
for mod in mod_list:
mod_spec = mod.split("\t")[1]
parsed_mod_list.append(_homogenize_mod(mod_spec))
return ", ".join(parsed_mod_list) if parsed_mod_list else []
if __name__ == "__main__":
fnames = [
[
"../../../test/params/metamorpheus_search_task_config.toml",
"../../../test/params/metamorpheus_version_result.txt",
],
# Reverse order
[
"../../../test/params/metamorpheus_version_result.txt",
"../../../test/params/metamorpheus_search_task_config.toml",
],
]
for file1, file2 in fnames:
# Extract parameters from the file
parameters = extract_params(file1, file2)
print(parameters.__dict__)
# With streamlit the IO object is used -> open files
print("\n")
with open(file1, "r") as f1, open(file2, "r") as f2:
parameters = extract_params(f1, f2)
f1.seek(0), f2.seek(0)
print("\n")
print(parameters.__dict__)
series = pd.Series(parameters.__dict__)
series.to_csv("../../../test/params/metamorpheus_parameters.csv")
print("\n")