"""Functionality to parse Maxqunt mqpar.xml parameter files."""
from __future__ import annotations
import collections
import logging
import os
import xml.etree.ElementTree as ET
from pathlib import Path
import numpy as np
import pandas as pd
from proteobench.io.params import ProteoBenchParameters
logger = logging.getLogger()
[docs]
def extend_tuple(t, target_length: int):
"""
Extend tuple with None values to match target length.
Parameters
----------
t : tuple
The tuple to extend.
target_length : int
The target length of the tuple.
Returns
-------
tuple
The extended tuple.
Raises
------
TypeError
If the input is not a tuple.
ValueError
If the tuple is longer than the target length.
"""
if not isinstance(t, tuple):
raise TypeError(f"Wrong type provided. Expected tuple, got {type(t)} : {t!r}")
if len(t) > target_length:
raise ValueError(f"Tuple is too long (got {len(t)}, expected {target_length}: {t!r}")
return t + (None,) * (target_length - len(t))
[docs]
def extend_tuples_with_none(list_of_tuples: list[tuple], target_length: int):
"""
Extend the tuples in a list of tuples with None values to match target length.
Parameters
----------
list_of_tuples : list of tuple
The list of tuples to extend.
target_length : int
The target length of the tuples.
Returns
-------
list of tuple
The list of extended tuples.
"""
extended_tuples = []
for tuple_ in list_of_tuples:
# if len(tuple_) > target_length:
# raise ValueError(f"tuple is too long: {len(tuple_)}")
extended_tuple = extend_tuple(tuple_, target_length)
extended_tuples.append(extended_tuple)
return extended_tuples
[docs]
def add_record(data: dict, tag: str, record) -> dict:
"""
Add tag and record to data dict.
Parameters
----------
data : dict
The data dictionary to add the record to.
tag : str
The tag for the record.
record : any
The record to add.
Returns
-------
dict
The updated data dictionary.
"""
if tag in data:
if isinstance(data[tag], list):
data[tag].append(record)
else:
data[tag] = [data[tag], record]
else:
data[tag] = record
return data
[docs]
def read_xml_record(element: ET.Element) -> dict:
"""
Read entire record in a nested dict structure.
Parameters
----------
element : xml.etree.ElementTree.Element
The XML element to read.
Returns
-------
dict
The nested dictionary structure of the XML element.
"""
data = dict()
if element.attrib:
data.update(element.attrib)
for child in element:
if len(child) > 1 and child.tag:
# if there is a list, process each element one by one
# either nested or a plain text
data[child.tag] = [
add_record(
dict(),
tag=child.tag,
record=read_xml_record(child) if not (child.text and child.text.strip()) else child.text.strip(),
)
for child in child
]
elif child.text and child.text.strip():
data = add_record(data=data, tag=child.tag, record=child.text.strip())
else:
record = read_xml_record(child)
data = add_record(data, child.tag, record)
if not data:
# empty strings and None are normalzied to None
return None
return data
[docs]
def read_file(file: str) -> dict:
"""
Read all entries in a MaxQuant xml file.
Parameters
----------
file : str
The path to the XML file.
Returns
-------
dict
The parsed XML data as a dictionary.
"""
tree: ET.ElementTree = ET.parse(file)
root: ET.Element = tree.getroot()
params: dict = read_xml_record(root)
return params
[docs]
def flatten_dict_of_dicts(d: dict, parent_key: str = "") -> dict:
"""
Build tuples for nested dictionaries for use as `pandas.MultiIndex`.
Parameters
----------
d : dict
Nested dictionary for which all keys are flattened to tuples.
parent_key : str, optional
Outer key (used for recursion), by default ''.
Returns
-------
dict
Flattened dictionary with tuple keys: {(outer_key, ..., inner_key) : value}.
"""
items = []
for k, v in d.items():
new_key = parent_key + (k,) if parent_key else (k,)
if isinstance(v, collections.abc.MutableMapping):
items.extend(flatten_dict_of_dicts(v, parent_key=new_key))
elif isinstance(v, list):
for item in v:
if isinstance(item, collections.abc.MutableMapping):
items.extend(flatten_dict_of_dicts(item, parent_key=new_key))
elif isinstance(item, str):
items.append((new_key, item))
else:
raise ValueError(f"Unknown item: {item:r}")
else:
items.append((new_key, v))
return items
[docs]
def build_Series_from_records(records, index_length=4):
"""
Build a pandas Series from records.
Parameters
----------
records : dict
The records to build the Series from.
index_length : int, optional
The length of the index, by default 4.
Returns
-------
pandas.Series
The pandas Series built from the records.
"""
records = flatten_dict_of_dicts(records)
idx = pd.MultiIndex.from_tuples((extend_tuple(k, index_length) for (k, _) in records))
return pd.Series((v for (k, v) in records), index=idx)
# create a first version of json files to match
if __name__ == "__main__":
import json
from pprint import pprint
for test_file in [
"../../../test/params/mqpar_MQ1.6.3.3_MBR.xml",
"../../../test/params/mqpar_MQ2.1.3.0_noMBR.xml",
"../../../test/params/mqpar1.5.3.30_MBR.xml",
"../../../test/params/mqpar_mq2.6.2.0_1mc_MBR.xml",
]:
print(f"{test_file = }")
record = read_file(test_file)
(
Path(test_file)
.with_suffix(".json")
.write_text(
json.dumps(
record,
indent=4,
)
)
)
record = build_Series_from_records(record, 4)
record = record.to_frame("run_identifier")
record.to_csv(Path(test_file).with_suffix(".csv"))
params = extract_params(test_file, ms2frac="FTMS")
pprint(params.__dict__)
test_file = Path(test_file)
fname = Path(str(test_file.with_suffix(".json").with_name(test_file.stem + "_sel")) + ".json")
with open(fname, "w") as f:
json.dump(params.__dict__, f, indent=4)