From e12b64d97cb5155fab15e4ea9e4c55d36802a623 Mon Sep 17 00:00:00 2001 From: RalfG Date: Wed, 11 Jun 2025 18:03:24 +0200 Subject: [PATCH 01/11] Add more support for typing, fix some typing-related edge case bugs --- psm_utils/peptidoform.py | 68 +++++++++++++++++++++++++++++----------- psm_utils/psm.py | 17 ++++++---- psm_utils/psm_list.py | 47 +++++++++++++++------------ 3 files changed, 87 insertions(+), 45 deletions(-) diff --git a/psm_utils/peptidoform.py b/psm_utils/peptidoform.py index f92e297..7f0923a 100644 --- a/psm_utils/peptidoform.py +++ b/psm_utils/peptidoform.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections import defaultdict -from typing import Iterable, List, Tuple, Union +from typing import Iterable, List, Tuple, TypedDict, Union, cast import numpy as np from pyteomics import mass, proforma @@ -29,8 +29,10 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: ---------- parsed_sequence : list List of tuples with residue and modifications for each location. - properties : dict[str, Any] - Dict with sequence-wide properties. + properties : :py:class:`PeptidoformProperties` + Dictionary with properties of the peptidoform, including N- and C-terminal + modifications, unlocalized modifications, labile modifications, fixed + modifications, and charge state. Examples -------- @@ -39,6 +41,10 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: 711.2567622919099 """ + self.parsed_sequence: List[Tuple[str, List[proforma.TagBase] | None]] + self.properties: PeptidoformProperties + + # Parse ProForma if isinstance(proforma_sequence, str): try: self.parsed_sequence, self.properties = proforma.parse(proforma_sequence) @@ -66,13 +72,21 @@ def __str__(self) -> str: def __hash__(self) -> int: return hash(self.proforma) - def __eq__(self, __o: Union[Peptidoform, str]) -> bool: + def __eq__(self, __o: object) -> bool: if isinstance(__o, str): return self.proforma == __o - elif isinstance(__o, Peptidoform): + elif isinstance(__o, Peptidoform): # type: ignore[return] return self.proforma == __o.proforma else: - raise TypeError(f"Cannot compare {type(__o)} with Peptidoform.") + raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}") + + def __lt__(self, __o: object) -> bool: + if isinstance(__o, str): + return self.proforma < __o + elif isinstance(__o, Peptidoform): + return self.proforma < __o.proforma + else: + raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}") def __iter__(self) -> Iterable[Tuple[str, Union[None, List[proforma.TagBase]]]]: return self.parsed_sequence.__iter__() @@ -188,8 +202,9 @@ def sequential_composition(self) -> list[mass.Composition]: # Get compositions for fixed modifications by amino acid fixed_rules = {} for rule in self.properties["fixed_modifications"]: - for aa in rule.targets: - fixed_rules[aa] = rule.modification_tag.composition + if rule.targets is not None: + for aa in rule.targets: + fixed_rules[aa] = rule.modification_tag.composition comp_list = [] @@ -220,6 +235,7 @@ def sequential_composition(self) -> list[mass.Composition]: # Localized modifications if tags: for tag in tags: + tag = cast(proforma.ModificationBase, tag) try: position_comp += tag.composition except (AttributeError, KeyError) as e: @@ -275,7 +291,7 @@ def composition(self) -> mass.Composition: return comp @property - def sequential_theoretical_mass(self) -> float: + def sequential_theoretical_mass(self) -> list[float]: """ Monoisotopic mass of both termini and each (modified) residue. @@ -296,8 +312,9 @@ def sequential_theoretical_mass(self) -> float: """ fixed_rules = {} for rule in self.properties["fixed_modifications"]: - for aa in rule.targets: - fixed_rules[aa] = rule.modification_tag.mass + if rule.targets is not None: + for aa in rule.targets: + fixed_rules[aa] = rule.modification_tag.mass mass_list = [] @@ -326,6 +343,7 @@ def sequential_theoretical_mass(self) -> float: # Localized modifications if tags: for tag in tags: + tag = cast(proforma.ModificationBase, tag) try: position_mass += tag.mass except (AttributeError, KeyError) as e: @@ -496,15 +514,14 @@ def add_fixed_modifications( """ if isinstance(modification_rules, dict): - modification_rules = modification_rules.items() - modification_rules = [ + modification_rules = list(modification_rules.items()) + + parsed_modification_rules = [ proforma.ModificationRule(proforma.process_tag_tokens(mod), targets) for mod, targets in modification_rules ] - if self.properties["fixed_modifications"]: - self.properties["fixed_modifications"].extend(modification_rules) - else: - self.properties["fixed_modifications"] = modification_rules + + self.properties.setdefault("fixed_modifications", []).extend(parsed_modification_rules) def apply_fixed_modifications(self): """ @@ -530,8 +547,9 @@ def apply_fixed_modifications(self): # Setup target_aa -> modification_list dictionary rule_dict = defaultdict(list) for rule in self.properties["fixed_modifications"]: - for target_aa in rule.targets: - rule_dict[target_aa].append(rule.modification_tag) + if rule.targets is not None: + for target_aa in rule.targets: + rule_dict[target_aa].append(rule.modification_tag) # Apply modifications to sequence for i, (aa, site_mods) in enumerate(self.parsed_sequence): @@ -553,6 +571,18 @@ def apply_fixed_modifications(self): self.properties["fixed_modifications"] = [] +class PeptidoformProperties(TypedDict): + """Property items of a :py:class:`Peptidoform`.""" + + n_term: list[proforma.ModificationBase] | None + c_term: list[proforma.ModificationBase] | None + unlocalized_modifications: list[proforma.ModificationBase] + labile_modifications: list[proforma.ModificationBase] + fixed_modifications: list[proforma.ModificationRule] + charge_state: proforma.ChargeState + isotopes: list[proforma.StableIsotope] + + def format_number_as_string(num): """Format number as string for ProForma mass modifications.""" # Using this method over `:+g` string formatting to avoid rounding and scientific notation diff --git a/psm_utils/psm.py b/psm_utils/psm.py index 2d01d08..9888d72 100644 --- a/psm_utils/psm.py +++ b/psm_utils/psm.py @@ -10,8 +10,8 @@ class PSM(BaseModel): """Data class representing a peptide-spectrum match (PSM).""" - peptidoform: Union[Peptidoform, str] - spectrum_id: Union[str] + peptidoform: Union[Peptidoform, str] # type: ignore + spectrum_id: str run: Optional[str] = None collection: Optional[str] = None spectrum: Optional[Any] = None @@ -89,25 +89,30 @@ def __init__(self, **data): super().__init__(**data) # Parse peptidoform if isinstance(self.peptidoform, str): - self.peptidoform = Peptidoform(self.peptidoform) + self.peptidoform: Peptidoform = Peptidoform(self.peptidoform) elif not isinstance(self.peptidoform, Peptidoform): raise TypeError( f"Peptidoform or str expected for `peptidoform`, not `{type(self.peptidoform)}`." ) - def __getitem__(self, item) -> any: + def __getitem__(self, item) -> Any: return getattr(self, item) - def __setitem__(self, item, value: any) -> None: + def __setitem__(self, item, value: Any) -> None: setattr(self, item, value) @property def precursor_mz_error(self) -> float: """Difference between observed and theoretical m/z in Da.""" theoretical_mz = self.peptidoform.theoretical_mz + if theoretical_mz is None or self.precursor_mz is None: + raise ValueError( + "Cannot calculate precursor m/z error: " + "precursor m/z is not set or theoretical m/z cannot be calculated." + ) return self.precursor_mz - theoretical_mz - def get_precursor_charge(self) -> int: + def get_precursor_charge(self) -> int | None: """Precursor charge, as embedded in :py:attr:`PSM.peptidoform`.""" return self.peptidoform.precursor_charge diff --git a/psm_utils/psm_list.py b/psm_utils/psm_list.py index c38ffdf..8190611 100644 --- a/psm_utils/psm_list.py +++ b/psm_utils/psm_list.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from typing import Iterable, List, Sequence +from typing import Iterator, List, Sequence, cast import numpy as np import pandas as pd @@ -83,13 +83,13 @@ def __str__(self): def __add__(self, other): return PSMList(psm_list=self.psm_list + other.psm_list) - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: # type: ignore[override] return self.psm_list.__iter__() def __len__(self) -> int: return self.psm_list.__len__() - def __getitem__(self, item) -> PSM | list[PSM]: + def __getitem__(self, item) -> PSM | PSMList | np.ndarray: if isinstance(item, (int, np.integer)): # Return single PSM by index return self.psm_list[item] @@ -127,16 +127,18 @@ def __setitem__(self, item, values: Sequence) -> None: @property def collections(self) -> list: """List of collections in :py:class:`PSMList`.""" - if (self["collection"] != None).any(): # noqa: E711 - return list(np.unique(self["collection"])) + collection_array = np.asarray(self["collection"]) + if (collection_array != None).any(): # noqa: E711 + return np.unique(collection_array).tolist() else: return [None] @property def runs(self) -> list: """List of runs in :py:class:`PSMList`.""" - if (self["run"] != None).any(): # noqa: E711 - return list(np.unique(self["run"])) + run_array = np.asarray(self["run"]) + if (run_array != None).any(): # noqa: E711 + return np.unique(run_array).tolist() else: return [None] @@ -168,14 +170,14 @@ def set_ranks(self, lower_score_better: bool = False): """Set identification ranks for all PSMs in :py:class:`PSMList`.""" columns = ["collection", "run", "spectrum_id", "score"] self["rank"] = ( - pd.DataFrame(self[columns], columns=columns) + pd.DataFrame(np.array([self[c] for c in columns]).transpose(), columns=columns) .sort_values("score", ascending=lower_score_better) .fillna(0) # groupby does not play well with None values .groupby(["collection", "run", "spectrum_id"]) .cumcount() .sort_index() + 1 # 1-based counting - ) + ).to_list() def get_rank1_psms(self, *args, **kwargs) -> PSMList: """ @@ -184,9 +186,10 @@ def get_rank1_psms(self, *args, **kwargs) -> PSMList: First runs :py:meth:`~set_ranks` with ``*args`` and ``**kwargs`` if if any PSM has no rank yet. """ - if None in self["rank"]: + rank_array = np.asarray(self["rank"]) + if None in rank_array: self.set_ranks(*args, **kwargs) - return self[self["rank"] == 1] + return PSMList(psm_list=[self.psm_list[i] for i in np.flatnonzero(rank_array == 1)]) def find_decoys(self, decoy_pattern: str) -> None: """ @@ -211,9 +214,12 @@ def find_decoys(self, decoy_pattern: str) -> None: >>> psm_list.find_decoys(r"^DECOY_") """ - decoy_pattern = re.compile(decoy_pattern) + pattern = re.compile(decoy_pattern) for psm in self: - psm.is_decoy = all([decoy_pattern.search(p) is not None for p in psm.protein_list]) + if psm.protein_list is not None: + psm.is_decoy = all(pattern.search(p) is not None for p in psm.protein_list) + else: + psm.is_decoy = None def calculate_qvalues(self, reverse: bool = True, **kwargs) -> None: """ @@ -233,7 +239,7 @@ def calculate_qvalues(self, reverse: bool = True, **kwargs) -> None: """ for key in ["score", "is_decoy"]: - if (self[key] == None).any(): # noqa: E711 (self[key] is a Numpy array) + if (np.asarray(self[key]) == None).any(): # noqa: E711 (self[key] is a Numpy array) raise ValueError( f"Cannot calculate q-values if not all PSMs have `{key}` assigned." ) @@ -294,16 +300,17 @@ def add_fixed_modifications( """ if isinstance(modification_rules, dict): - modification_rules = modification_rules.items() - modification_rules = [ + modification_rules = list(modification_rules.items()) + + parsed_modification_rules = [ proforma.ModificationRule(proforma.process_tag_tokens(mod), targets) for mod, targets in modification_rules ] + for psm in self.psm_list: - if psm.peptidoform.properties["fixed_modifications"]: - psm.peptidoform.properties["fixed_modifications"].extend(modification_rules) - else: - psm.peptidoform.properties["fixed_modifications"] = modification_rules + psm.peptidoform.properties.setdefault("fixed_modifications", []).extend( # type: ignore[union-attr] + cast(list, parsed_modification_rules) + ) def apply_fixed_modifications(self): """ From 4044a550b68f42a43745345a13bbc63993a61e40 Mon Sep 17 00:00:00 2001 From: RalfG Date: Tue, 5 Aug 2025 18:04:36 +0200 Subject: [PATCH 02/11] Typing; everywhere... Also fully upgraded the PD module to slqalchemy 2 paradigms and added tests. --- psm_utils/__main__.py | 2 + psm_utils/io/__init__.py | 207 +++-- psm_utils/io/_base_classes.py | 48 +- psm_utils/io/_pd_msf_tables.py | 895 +++++++++++----------- psm_utils/io/_utils.py | 11 +- psm_utils/io/alphadia.py | 54 +- psm_utils/io/diann.py | 51 +- psm_utils/io/flashlfq.py | 95 ++- psm_utils/io/fragpipe.py | 45 +- psm_utils/io/idxml.py | 526 ++++++++----- psm_utils/io/ionbot.py | 168 ++-- psm_utils/io/maxquant.py | 47 +- psm_utils/io/msamanda.py | 57 +- psm_utils/io/mzid.py | 277 ++++--- psm_utils/io/parquet.py | 63 +- psm_utils/io/peptide_record.py | 293 ++++--- psm_utils/io/pepxml.py | 94 ++- psm_utils/io/percolator.py | 198 ++--- psm_utils/io/proteome_discoverer.py | 305 ++++++-- psm_utils/io/proteoscape.py | 57 +- psm_utils/io/sage.py | 77 +- psm_utils/io/tsv.py | 89 ++- psm_utils/io/xtandem.py | 91 ++- psm_utils/peptidoform.py | 126 +-- psm_utils/psm.py | 65 +- psm_utils/psm_list.py | 63 +- psm_utils/utils.py | 20 +- pyproject.toml | 15 +- tests/test_data/minimal_test.msf | Bin 0 -> 32768 bytes tests/test_data/minimal_v79_test.msf | Bin 0 -> 663552 bytes tests/test_io/test_peptide_record.py | 36 +- tests/test_io/test_proteome_discoverer.py | 554 +++++++++++++ 32 files changed, 2980 insertions(+), 1649 deletions(-) create mode 100644 tests/test_data/minimal_test.msf create mode 100644 tests/test_data/minimal_v79_test.msf create mode 100644 tests/test_io/test_proteome_discoverer.py diff --git a/psm_utils/__main__.py b/psm_utils/__main__.py index db273b6..c74bbe2 100644 --- a/psm_utils/__main__.py +++ b/psm_utils/__main__.py @@ -32,6 +32,7 @@ def main(): + """Run the main entry point for the psm_utils CLI.""" logging.basicConfig( level="NOTSET", format="%(message)s", @@ -47,6 +48,7 @@ def main(): @click.group() def cli(): + """Command line interface for psm_utils.""" pass diff --git a/psm_utils/io/__init__.py b/psm_utils/io/__init__.py index ac309e2..295b194 100644 --- a/psm_utils/io/__init__.py +++ b/psm_utils/io/__init__.py @@ -1,10 +1,46 @@ -"""Parsers for proteomics search results from various search engines.""" +""" +Parsers for proteomics search results from various search engines. + +This module provides a unified interface for reading and writing peptide-spectrum match (PSM) +files from various proteomics search engines and analysis tools. It supports automatic file +type detection and conversion between different formats. + +The module includes: + +- Reader and writer classes for various PSM file formats +- Automatic file type inference from filename patterns +- File conversion utilities +- Progress tracking for long operations +- Type-safe interfaces with comprehensive error handling + +Supported file formats include MaxQuant, MS²PIP, Percolator, mzIdentML, pepXML, and many others. +See the documentation for a complete list of supported formats. + +Examples +-------- +Read a PSM file with automatic format detection: + +>>> from psm_utils.io import read_file +>>> psm_list = read_file("results.tsv") + +Convert between file formats: + +>>> from psm_utils.io import convert +>>> convert("input.msms", "output.mzid") + +Write a PSMList to file: + +>>> from psm_utils.io import write_file +>>> write_file(psm_list, "output.tsv") + +""" from __future__ import annotations import re from pathlib import Path from tempfile import NamedTemporaryFile +from typing import Protocol, TypedDict, runtime_checkable from rich.progress import track @@ -26,12 +62,22 @@ import psm_utils.io.sage as sage import psm_utils.io.tsv as tsv import psm_utils.io.xtandem as xtandem -from psm_utils.io._base_classes import WriterBase +from psm_utils.io._base_classes import ReaderBase, WriterBase from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.psm import PSM from psm_utils.psm_list import PSMList -FILETYPES = { + +class FileType(TypedDict): + """Type definition for filetype properties.""" + + reader: type[ReaderBase] | None + writer: type[WriterBase] | None + extension: str + filename_pattern: str + + +FILETYPES: dict[str, FileType] = { "flashlfq": { "reader": flashlfq.FlashLFQReader, "writer": flashlfq.FlashLFQWriter, @@ -150,12 +196,24 @@ FILETYPES["sage"] = FILETYPES["sage_tsv"] # Alias for backwards compatibility -READERS = {k: v["reader"] for k, v in FILETYPES.items() if v["reader"]} -WRITERS = {k: v["writer"] for k, v in FILETYPES.items() if v["writer"]} +# Type-annotated lookup dictionaries for readers and writers +READERS: dict[str, type[ReaderBase]] = { + k: v["reader"] for k, v in FILETYPES.items() if v["reader"] +} +WRITERS: dict[str, type[WriterBase]] = { + k: v["writer"] for k, v in FILETYPES.items() if v["writer"] +} + + +@runtime_checkable +class _SupportsStr(Protocol): + """Protocol to check if an object supports string conversion.""" + def __str__(self) -> str: ... -def _infer_filetype(filename: str): - """Infer filetype from filename.""" + +def _infer_filetype(filename: _SupportsStr) -> str: + """Infer filetype from filename using pattern matching.""" for filetype, properties in FILETYPES.items(): if re.fullmatch(properties["filename_pattern"], str(filename), flags=re.IGNORECASE): return filetype @@ -163,7 +221,7 @@ def _infer_filetype(filename: str): raise PSMUtilsIOException("Could not infer filetype.") -def _supports_write_psm(writer: WriterBase): +def _supports_write_psm(writer: type[WriterBase]) -> bool: """Check if writer supports write_psm method.""" with NamedTemporaryFile(delete=False) as temp_file: temp_file.close() @@ -182,21 +240,32 @@ def _supports_write_psm(writer: WriterBase): return supports_write_psm -def read_file(filename: str | Path, *args, filetype: str = "infer", **kwargs): +def read_file(filename: str | Path, *args, filetype: str = "infer", **kwargs) -> PSMList: """ Read PSM file into :py:class:`~psm_utils.psmlist.PSMList`. Parameters ---------- - filename: str - Path to file. - filetype: str, optional - File type. Any PSM file type with read support. See psm_utils tag in - :ref:`Supported file formats`. - *args : tuple - Additional arguments are passed to the :py:class:`psm_utils.io` reader. - **kwargs : dict, optional - Additional keyword arguments are passed to the :py:class:`psm_utils.io` reader. + filename + Path to the PSM file to read. + filetype + File type specification. Can be any PSM file type with read support or "infer" to + automatically detect from filename pattern. See documentation for supported file formats. + *args + Additional positional arguments passed to the PSM file reader. + **kwargs + Additional keyword arguments passed to the PSM file reader. + + Returns + ------- + List of PSM objects parsed from the input file. + + Raises + ------ + PSMUtilsIOException + If filetype cannot be inferred or if the specified filetype is + unknown or not supported for reading. + """ if filetype == "infer": filetype = _infer_filetype(filename) @@ -218,25 +287,34 @@ def write_file( filetype: str = "infer", show_progressbar: bool = False, **kwargs, -): +) -> None: """ Write :py:class:`~psm_utils.psmlist.PSMList` to PSM file. Parameters ---------- - psm_list: PSMList - PSM list to be written. - filename: str - Path to file. - filetype: str, optional - File type. Any PSM file type with read support. See psm_utils tag in - :ref:`Supported file formats`. - show_progressbar: bool, optional - Show progress bar for conversion process. (default: False) - *args : tuple - Additional arguments are passed to the :py:class:`psm_utils.io` writer. - **kwargs : dict, optional - Additional keyword arguments are passed to the :py:class:`psm_utils.io` writer. + psm_list + List of PSM objects to be written to file. + filename + Path to the output file. + filetype + File type specification. Can be any PSM file type with write support or "infer" to + automatically detect from filename pattern. See documentation for supported file formats. + show_progressbar + Whether to display a progress bar during the writing process. + *args + Additional positional arguments passed to the PSM file writer. + **kwargs + Additional keyword arguments passed to the PSM file writer. + + Raises + ------ + PSMUtilsIOException + If filetype cannot be inferred or if the specified filetype is + unknown or not supported for writing. + IndexError + If psm_list is empty and cannot provide an example PSM. + """ if filetype == "infer": filetype = _infer_filetype(filename) @@ -270,29 +348,37 @@ def convert( input_filetype: str = "infer", output_filetype: str = "infer", show_progressbar: bool = False, -): +) -> None: """ Convert a PSM file from one format into another. Parameters ---------- - input_filename: str - Path to input file. - output_filename: str - Path to output file. - input_filetype: str, optional - File type. Any PSM file type with read support. See psm_utils tag in - :ref:`Supported file formats`. - output_filetype: str, optional - File type. Any PSM file type with write support. See psm_utils tag in - :ref:`Supported file formats`. - show_progressbar: bool, optional - Show progress bar for conversion process. (default: False) - + input_filename + Path to the input PSM file. + output_filename + Path to the output PSM file. + input_filetype + Input file type specification. Can be any PSM file type with read support + or "infer" to automatically detect from filename pattern. + See documentation for supported file formats. + output_filetype + Output file type specification. Can be any PSM file type with write support + or "infer" to automatically detect from filename pattern. + See documentation for supported file formats. + show_progressbar + Whether to display a progress bar during the conversion process. + + Raises + ------ + PSMUtilsIOException + If input or output filetypes cannot be inferred, if the specified filetypes are + unknown or not supported, or if the input file is empty. + KeyError + If the specified filetype is not found in READERS or WRITERS dictionaries. Examples -------- - Convert a MaxQuant msms.txt file to a MS²PIP peprec file, while inferring the applicable file types from the file extensions: @@ -309,19 +395,23 @@ def convert( ... output_filetype="peprec" ... ) - Note that filetypes can only be inferred for select specific file names and/or - extensions, such as ``msms.txt`` or ``*.peprec``. + Notes + ----- + Filetypes can only be inferred for select specific file names and/or extensions, such as + ``msms.txt`` or ``*.peprec``. """ - # If needed, infer input and output filetypes if input_filetype == "infer": input_filetype = _infer_filetype(input_filename) if output_filetype == "infer": output_filetype = _infer_filetype(output_filename) - reader_cls = READERS[input_filetype] - writer_cls = WRITERS[output_filetype] + try: + reader_cls = READERS[input_filetype] + writer_cls = WRITERS[output_filetype] + except KeyError as e: + raise PSMUtilsIOException(f"Filetype '{e.args[0]}' unknown or not supported.") from e # Remove file if already exists to avoid appending: if Path(output_filename).is_file(): @@ -330,15 +420,20 @@ def convert( reader = reader_cls(input_filename) if _supports_write_psm(writer_cls): - # Setup iterator, potentially with progress bar - iterator = ( - track(reader, description="[green]Converting file") if show_progressbar else reader - ) + # Setup iterator, potentially with indeterminate progress bar + if show_progressbar: + # Use indeterminate progress tracking for lazy evaluation + iterator = track(reader, description="[green]Converting file") + else: + iterator = reader # Get example PSM and instantiate writer for psm in reader: example_psm = psm break + else: + raise PSMUtilsIOException("Input file is empty or does not contain valid PSMs.") + writer = writer_cls(output_filename, example_psm=example_psm, mode="write") # Convert diff --git a/psm_utils/io/_base_classes.py b/psm_utils/io/_base_classes.py index 60f9c19..d0646b0 100644 --- a/psm_utils/io/_base_classes.py +++ b/psm_utils/io/_base_classes.py @@ -3,6 +3,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from collections.abc import Iterator from pathlib import Path from psm_utils.psm import PSM @@ -12,6 +13,8 @@ class ReaderBase(ABC): """Abstract base class for PSM file readers.""" + filename: Path + def __init__( self, filename: str | Path, @@ -19,26 +22,32 @@ def __init__( **kwargs, ) -> None: """ - Reader for PSM file. + Initialize PSM file reader. Parameters ---------- - filename: str, pathlib.Path + filename : str or pathlib.Path Path to PSM file. + *args + Additional positional arguments for subclasses. + **kwargs + Additional keyword arguments for subclasses. """ super().__init__() - self.filename = Path(filename) - def __enter__(self): + def __enter__(self) -> ReaderBase: + """Enter context manager.""" return self - def __exit__(self, *args, **kwargs): + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Exit context manager.""" pass @abstractmethod - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: + """Iterate over the PSM file and return PSMs one-by-one.""" raise NotImplementedError() def read_file(self) -> PSMList: @@ -49,22 +58,39 @@ def read_file(self) -> PSMList: class WriterBase(ABC): """Abstract base class for PSM file writers.""" - def __init__(self, filename, *args, **kwargs): + filename: Path + + def __init__(self, filename: str | Path, *args, **kwargs) -> None: + """ + Initialize PSM file writer. + + Parameters + ---------- + filename : str or pathlib.Path + Path to output PSM file. + *args + Additional positional arguments for subclasses. + **kwargs + Additional keyword arguments for subclasses. + + """ super().__init__() self.filename = Path(filename) - def __enter__(self): + def __enter__(self) -> WriterBase: + """Enter context manager.""" return self - def __exit__(self, *args, **kwargs): + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Exit context manager.""" pass @abstractmethod - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """Write a single PSM to the PSM file.""" raise NotImplementedError() @abstractmethod - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """Write an entire PSMList to the PSM file.""" raise NotImplementedError() diff --git a/psm_utils/io/_pd_msf_tables.py b/psm_utils/io/_pd_msf_tables.py index a2323bd..fbbbdd4 100644 --- a/psm_utils/io/_pd_msf_tables.py +++ b/psm_utils/io/_pd_msf_tables.py @@ -1,242 +1,270 @@ -"""SQLAlchemy models for Mascot MSF files.""" +""" +SQLAlchemy ORM models for Proteome Discoverer MSF database files. + +This module provides SQLAlchemy table definitions for interfacing with Proteome Discoverer MSF +(Mascot Search Form) database files. MSF files contain proteomics search results including +peptide identifications, protein annotations, spectra metadata, and quantification data. + +The table definitions are auto-generated from MSF schema and follow SQLAlchemy 2.0 patterns +with proper typing support. + +Examples +-------- +>>> from psm_utils.io._pd_msf_tables import Base, Peptide +>>> # Use with SQLAlchemy session to query MSF database +>>> session.query(Peptide).filter(Peptide.ConfidenceLevel > 2).all() + +Notes +----- +These models are primarily used internally by the proteome_discoverer module for reading PSM +data from MSF files. + +""" + +from __future__ import annotations + +from datetime import datetime from sqlalchemy import ( CHAR, BigInteger, Boolean, - Column, DateTime, Float, Index, Integer, LargeBinary, + MetaData, SmallInteger, String, - Table, Text, UniqueConstraint, text, ) +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + """Base class for all MSF table models.""" -try: - from sqlalchemy.orm import declarative_base -except ImportError: - from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.sql.sqltypes import NullType + pass -Base = declarative_base() -metadata = Base.metadata + +# Module-level metadata reference for table definitions +metadata: MetaData = Base.metadata class AminoAcidModification(Base): __tablename__ = "AminoAcidModifications" - AminoAcidModificationID = Column(Integer, primary_key=True) - ModificationName = Column(String, nullable=False) - DeltaMass = Column(Float) - Substitution = Column(String) - LeavingGroup = Column(String) - Abbreviation = Column(String, nullable=False) - PositionType = Column(Integer, nullable=False) - IsActive = Column(Boolean) - DeltaAverageMass = Column(Float) - UnimodAccession = Column(String) - IsSubstitution = Column(Boolean, nullable=False, server_default=text("0")) + AminoAcidModificationID: Mapped[int] = mapped_column(Integer, primary_key=True) + ModificationName: Mapped[str] = mapped_column(String, nullable=False) + DeltaMass: Mapped[float | None] = mapped_column(Float) + Substitution: Mapped[str | None] = mapped_column(String) + LeavingGroup: Mapped[str | None] = mapped_column(String) + Abbreviation: Mapped[str] = mapped_column(String, nullable=False) + PositionType: Mapped[int] = mapped_column(Integer, nullable=False) + IsActive: Mapped[bool | None] = mapped_column(Boolean) + DeltaAverageMass: Mapped[float | None] = mapped_column(Float) + UnimodAccession: Mapped[str | None] = mapped_column(String) + IsSubstitution: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("0")) class AminoAcidModificationsAminoAcid(Base): __tablename__ = "AminoAcidModificationsAminoAcids" - AminoAcidModificationID = Column(Integer, primary_key=True, nullable=False) - AminoAcidID = Column(Integer, primary_key=True, nullable=False) - Classification = Column(Integer, nullable=False) + AminoAcidModificationID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + AminoAcidID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Classification: Mapped[int] = mapped_column(Integer, nullable=False) class AminoAcidModificationsAminoAcidsNL(Base): __tablename__ = "AminoAcidModificationsAminoAcidsNL" - AminoAcidModificationID = Column(Integer, primary_key=True, nullable=False) - AminoAcidID = Column(Integer, primary_key=True, nullable=False) - NeutralLossID = Column(Integer, primary_key=True, nullable=False) + AminoAcidModificationID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + AminoAcidID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + NeutralLossID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) class AminoAcidModificationsNeutralLoss(Base): __tablename__ = "AminoAcidModificationsNeutralLosses" - NeutralLossID = Column(Integer, primary_key=True) - Name = Column(String, nullable=False) - MonoisotopicMass = Column(Float, nullable=False) - AverageMass = Column(Float, nullable=False) + NeutralLossID: Mapped[int] = mapped_column(Integer, primary_key=True) + Name: Mapped[str] = mapped_column(String, nullable=False) + MonoisotopicMass: Mapped[float] = mapped_column(Float, nullable=False) + AverageMass: Mapped[float] = mapped_column(Float, nullable=False) class AminoAcid(Base): __tablename__ = "AminoAcids" - AminoAcidID = Column(Integer, primary_key=True) - AminoAcidName = Column(String, nullable=False) - OneLetterCode = Column(CHAR) - ThreeLetterCode = Column(CHAR) - MonoisotopicMass = Column(Float, nullable=False) - AverageMass = Column(Float, nullable=False) - SumFormula = Column(String) + AminoAcidID: Mapped[int] = mapped_column(Integer, primary_key=True) + AminoAcidName: Mapped[str] = mapped_column(String, nullable=False) + OneLetterCode: Mapped[str | None] = mapped_column(CHAR) + ThreeLetterCode: Mapped[str | None] = mapped_column(CHAR) + MonoisotopicMass: Mapped[float] = mapped_column(Float, nullable=False) + AverageMass: Mapped[float] = mapped_column(Float, nullable=False) + SumFormula: Mapped[str | None] = mapped_column(String) class AnnotationDataVersion(Base): __tablename__ = "AnnotationDataVersion" - PcDataVersion = Column(Integer, primary_key=True) - PcDataRelease = Column(BigInteger, nullable=False) + PcDataVersion: Mapped[int] = mapped_column(Integer, primary_key=True) + PcDataRelease: Mapped[int] = mapped_column(BigInteger, nullable=False) class AnnotationDataset(Base): __tablename__ = "AnnotationDataset" - DatasetId = Column(Integer, primary_key=True) - Name = Column(String, nullable=False) - DisplayName = Column(String, nullable=False) - Guid = Column(String, nullable=False) - Description = Column(Text) + DatasetId: Mapped[int] = mapped_column(Integer, primary_key=True) + Name: Mapped[str] = mapped_column(String, nullable=False) + DisplayName: Mapped[str] = mapped_column(String, nullable=False) + Guid: Mapped[str] = mapped_column(String, nullable=False) + Description: Mapped[str | None] = mapped_column(Text) class AnnotationGroup(Base): __tablename__ = "AnnotationGroups" - AnnotationGroupId = Column(Integer, primary_key=True, nullable=False) - Description = Column(Text) - DatasetId = Column(Integer, primary_key=True, nullable=False) - Position = Column(Integer, nullable=False) - ColorR = Column(Integer, nullable=False) - ColorG = Column(Integer, nullable=False) - ColorB = Column(Integer, nullable=False) - GroupDefinition = Column(LargeBinary) + AnnotationGroupId: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Description: Mapped[str | None] = mapped_column(Text) + DatasetId: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Position: Mapped[int] = mapped_column(Integer, nullable=False) + ColorR: Mapped[int] = mapped_column(Integer, nullable=False) + ColorG: Mapped[int] = mapped_column(Integer, nullable=False) + ColorB: Mapped[int] = mapped_column(Integer, nullable=False) + GroupDefinition: Mapped[bytes | None] = mapped_column(LargeBinary) class AnnotationType(Base): __tablename__ = "AnnotationTypes" - AnnotationTypeId = Column(Integer, primary_key=True) - Name = Column(String, nullable=False) - Description = Column(Text) + AnnotationTypeId: Mapped[int] = mapped_column(Integer, primary_key=True) + Name: Mapped[str] = mapped_column(String, nullable=False) + Description: Mapped[str | None] = mapped_column(Text) class Annotation(Base): __tablename__ = "Annotations" - AnnotationId = Column(Integer, primary_key=True) - Accession = Column(String, nullable=False) - Description = Column(Text) - type = Column(Integer) + AnnotationId: Mapped[int] = mapped_column(Integer, primary_key=True) + Accession: Mapped[str] = mapped_column(String, nullable=False) + Description: Mapped[str | None] = mapped_column(Text) + type: Mapped[int | None] = mapped_column(Integer) class AnnotationsAnnotationGroup(Base): __tablename__ = "AnnotationsAnnotationGroups" - AnnotationId = Column(Integer, primary_key=True, nullable=False) - AnnotationGroupId = Column(Integer, primary_key=True, nullable=False) + AnnotationId: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + AnnotationGroupId: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) class AnnotationsProtein(Base): __tablename__ = "AnnotationsProtein" - proteinID = Column(Integer, primary_key=True, nullable=False) - AnnotationId = Column(Integer, primary_key=True, nullable=False) - Evidence = Column(Integer, primary_key=True) - PositionBegin = Column(Integer, primary_key=True) - PositionEnd = Column(Integer) - ProteinAccession = Column(String, primary_key=True, nullable=False) + proteinID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + AnnotationId: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Evidence: Mapped[int | None] = mapped_column(Integer, primary_key=True) + PositionBegin: Mapped[int | None] = mapped_column(Integer, primary_key=True) + PositionEnd: Mapped[int | None] = mapped_column(Integer) + ProteinAccession: Mapped[str] = mapped_column(String, primary_key=True, nullable=False) class Chromatogram(Base): __tablename__ = "Chromatograms" - FileID = Column(Integer, primary_key=True, nullable=False) - TraceType = Column(Integer, primary_key=True, nullable=False) - Chromatogram = Column(String, nullable=False) + FileID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + TraceType: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Chromatogram: Mapped[str] = mapped_column(String, nullable=False) class CustomDataField(Base): __tablename__ = "CustomDataFields" - FieldID = Column(Integer, primary_key=True) - Guid = Column(String, nullable=False) - DisplayName = Column(String, nullable=False) - SourceNodeNumber = Column(Integer, nullable=False) - TargetNodeNumber = Column(Integer, nullable=False) - DataType = Column(Integer, nullable=False) - DataTarget = Column(Integer, nullable=False) - Version = Column(Float, nullable=False) - AccessMode = Column(Integer, server_default=text("0")) - Visibility = Column(Integer, server_default=text("0")) - GroupVisibility = Column(Integer, server_default=text("0")) - Format = Column(String) - PlotType = Column(Integer, nullable=False) - DataPurpose = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True) + Guid: Mapped[str] = mapped_column(String, nullable=False) + DisplayName: Mapped[str] = mapped_column(String, nullable=False) + SourceNodeNumber: Mapped[int] = mapped_column(Integer, nullable=False) + TargetNodeNumber: Mapped[int] = mapped_column(Integer, nullable=False) + DataType: Mapped[int] = mapped_column(Integer, nullable=False) + DataTarget: Mapped[int] = mapped_column(Integer, nullable=False) + Version: Mapped[float] = mapped_column(Float, nullable=False) + AccessMode: Mapped[int | None] = mapped_column(Integer, server_default=text("0")) + Visibility: Mapped[int | None] = mapped_column(Integer, server_default=text("0")) + GroupVisibility: Mapped[int | None] = mapped_column(Integer, server_default=text("0")) + Format: Mapped[str | None] = mapped_column(String) + PlotType: Mapped[int] = mapped_column(Integer, nullable=False) + DataPurpose: Mapped[str | None] = mapped_column(String) class CustomDataPeptide(Base): __tablename__ = "CustomDataPeptides" - FieldID = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + FieldValue: Mapped[str | None] = mapped_column(String) class CustomDataPeptidesDecoy(Base): __tablename__ = "CustomDataPeptides_decoy" - FieldID = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + FieldValue: Mapped[str | None] = mapped_column(String) class CustomDataProcessingNode(Base): __tablename__ = "CustomDataProcessingNodes" - FieldID = Column(Integer, primary_key=True, nullable=False) - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column( + Integer, primary_key=True, nullable=False, index=True + ) + FieldValue: Mapped[str | None] = mapped_column(String) class CustomDataProtein(Base): __tablename__ = "CustomDataProteins" - FieldID = Column(Integer, primary_key=True, nullable=False) - ProteinID = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + FieldValue: Mapped[str | None] = mapped_column(String) class CustomDataProteinsDecoy(Base): __tablename__ = "CustomDataProteins_decoy" - FieldID = Column(Integer, primary_key=True, nullable=False) - ProteinID = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + FieldValue: Mapped[str | None] = mapped_column(String) class CustomDataSpectra(Base): __tablename__ = "CustomDataSpectra" - FieldID = Column(Integer, primary_key=True, nullable=False) - SpectrumID = Column(Integer, primary_key=True, nullable=False, index=True) - FieldValue = Column(String) + FieldID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + SpectrumID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + FieldValue: Mapped[str | None] = mapped_column(String) class Enzyme(Base): __tablename__ = "Enzymes" - EnzymeID = Column(Integer, primary_key=True) - Name = Column(String, nullable=False) - Abbreviation = Column(String, nullable=False) - Seperator = Column(String, nullable=False) - NonSeperator = Column(String, nullable=False) - Offset = Column(Integer, nullable=False) + EnzymeID: Mapped[int] = mapped_column(Integer, primary_key=True) + Name: Mapped[str] = mapped_column(String, nullable=False) + Abbreviation: Mapped[str] = mapped_column(String, nullable=False) + Seperator: Mapped[str] = mapped_column(String, nullable=False) + NonSeperator: Mapped[str] = mapped_column(String, nullable=False) + Offset: Mapped[int] = mapped_column(Integer, nullable=False) class EnzymesCleavageSpecificity(Base): __tablename__ = "EnzymesCleavageSpecificities" - EnzymeID = Column(Integer, primary_key=True, nullable=False) - Specificity = Column(Integer, primary_key=True, nullable=False) + EnzymeID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Specificity: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) class EventAnnotation(Base): @@ -248,20 +276,20 @@ class EventAnnotation(Base): Index("IX_EventAnnotations_QuanResultID_QuanChannelID", "QuanResultID", "QuanChannelID"), ) - EventID = Column(Integer, primary_key=True) - Charge = Column(SmallInteger, nullable=False) - IsotopePatternID = Column(Integer, nullable=False) - QuanResultID = Column(Integer, nullable=False) - QuanChannelID = Column(Integer, nullable=False) + EventID: Mapped[int] = mapped_column(Integer, primary_key=True) + Charge: Mapped[int] = mapped_column(SmallInteger, nullable=False) + IsotopePatternID: Mapped[int] = mapped_column(Integer, nullable=False) + QuanResultID: Mapped[int] = mapped_column(Integer, nullable=False) + QuanChannelID: Mapped[int] = mapped_column(Integer, nullable=False) class EventAreaAnnotation(Base): __tablename__ = "EventAreaAnnotations" - EventID = Column(Integer, primary_key=True) - Charge = Column(SmallInteger, nullable=False) - IsotopePatternID = Column(Integer, nullable=False, index=True) - QuanResultID = Column(Integer, nullable=False) + EventID: Mapped[int] = mapped_column(Integer, primary_key=True) + Charge: Mapped[int] = mapped_column(SmallInteger, nullable=False) + IsotopePatternID: Mapped[int] = mapped_column(Integer, nullable=False, index=True) + QuanResultID: Mapped[int] = mapped_column(Integer, nullable=False) class Event(Base): @@ -271,316 +299,325 @@ class Event(Base): Index("IX_Events_FileID_RT", "FileID", "RT"), ) - EventID = Column(Integer, primary_key=True) - Mass = Column(Float, nullable=False) - MassAvg = Column(Float, nullable=False) - Area = Column(Float, nullable=False) - Intensity = Column(Float, nullable=False) - PeakWidth = Column(Float, nullable=False) - RT = Column(Float, nullable=False) - LeftRT = Column(Float, nullable=False) - RightRT = Column(Float, nullable=False) - SN = Column(Float, nullable=False, server_default=text("0.0")) - FileID = Column(Integer, nullable=False) + EventID: Mapped[int] = mapped_column(Integer, primary_key=True) + Mass: Mapped[float] = mapped_column(Float, nullable=False) + MassAvg: Mapped[float] = mapped_column(Float, nullable=False) + Area: Mapped[float] = mapped_column(Float, nullable=False) + Intensity: Mapped[float] = mapped_column(Float, nullable=False) + PeakWidth: Mapped[float] = mapped_column(Float, nullable=False) + RT: Mapped[float] = mapped_column(Float, nullable=False) + LeftRT: Mapped[float] = mapped_column(Float, nullable=False) + RightRT: Mapped[float] = mapped_column(Float, nullable=False) + SN: Mapped[float] = mapped_column(Float, nullable=False, server_default=text("0.0")) + FileID: Mapped[int] = mapped_column(Integer, nullable=False) class FastaFile(Base): __tablename__ = "FastaFiles" - FastaFileID = Column(Integer, primary_key=True) - FileName = Column(String, nullable=False) - State = Column(Integer, nullable=False) - VirtualFileName = Column(String, nullable=False) - FileSize = Column(BigInteger, nullable=False) - FileTime = Column(BigInteger, nullable=False) - NumberOfProteins = Column(BigInteger) - NumberOfAminoAcids = Column(BigInteger) - FileHashCode = Column(BigInteger) - Hidden = Column(Boolean, nullable=False) - IsSrfImport = Column(Boolean, nullable=False) - IsScheduledForDeletion = Column(Boolean, nullable=False, server_default=text("0")) + FastaFileID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + FileName: Mapped[str] = mapped_column(String, nullable=False) + State: Mapped[int] = mapped_column(Integer, nullable=False) + VirtualFileName: Mapped[str] = mapped_column(String, nullable=False) + FileSize: Mapped[int] = mapped_column(BigInteger, nullable=False) + FileTime: Mapped[int] = mapped_column(BigInteger, nullable=False) + NumberOfProteins: Mapped[int | None] = mapped_column(BigInteger) + NumberOfAminoAcids: Mapped[int | None] = mapped_column(BigInteger) + FileHashCode: Mapped[int | None] = mapped_column(BigInteger) + Hidden: Mapped[bool] = mapped_column(Boolean, nullable=False) + IsSrfImport: Mapped[bool] = mapped_column(Boolean, nullable=False) + IsScheduledForDeletion: Mapped[bool] = mapped_column( + Boolean, nullable=False, server_default=text("0") + ) class FastaFilesProteinAnnotation(Base): __tablename__ = "FastaFilesProteinAnnotations" - FastaFileID = Column(Integer, primary_key=True, nullable=False) - ProteinAnnotationID = Column(Integer, primary_key=True, nullable=False, index=True) + FastaFileID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProteinAnnotationID: Mapped[int] = mapped_column( + Integer, primary_key=True, nullable=False, index=True + ) class FileInfo(Base): __tablename__ = "FileInfos" - FileID = Column(Integer, primary_key=True) - FileName = Column(String, nullable=False) - FileTime = Column(String, nullable=False) - FileSize = Column(BigInteger, nullable=False) - PhysicalFileName = Column(String, nullable=False) - FileType = Column(SmallInteger, nullable=False) + FileID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + FileName: Mapped[str] = mapped_column(String, nullable=False) + FileTime: Mapped[str] = mapped_column(String, nullable=False) + FileSize: Mapped[int] = mapped_column(BigInteger, nullable=False) + PhysicalFileName: Mapped[str] = mapped_column(String, nullable=False) + FileType: Mapped[int] = mapped_column(SmallInteger, nullable=False) class MassPeakRelation(Base): __tablename__ = "MassPeakRelations" - MassPeakID = Column(Integer, primary_key=True, nullable=False) - RelatedMassPeakID = Column(Integer, primary_key=True, nullable=False) + MassPeakID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + RelatedMassPeakID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) class MassPeak(Base): __tablename__ = "MassPeaks" - MassPeakID = Column(Integer, primary_key=True) - Charge = Column(SmallInteger) - Intensity = Column(Float) - Mass = Column(Float) - ScanNumbers = Column(String) - FileID = Column(Integer) - PercentIsolationInterference = Column(Float) - IonInjectTime = Column(Integer) + MassPeakID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + Charge: Mapped[int | None] = mapped_column(SmallInteger) + Intensity: Mapped[float | None] = mapped_column(Float) + Mass: Mapped[float | None] = mapped_column(Float) + ScanNumbers: Mapped[str | None] = mapped_column(String) + FileID: Mapped[int | None] = mapped_column(Integer) + PercentIsolationInterference: Mapped[float | None] = mapped_column(Float) + IonInjectTime: Mapped[int | None] = mapped_column(Integer) class PeptideScore(Base): __tablename__ = "PeptideScores" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - ScoreID = Column(Integer, primary_key=True, nullable=False) - ProcessingNodeID = Column(Integer) - ScoreValue = Column(Float, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ScoreID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProcessingNodeID: Mapped[int | None] = mapped_column(Integer) + ScoreValue: Mapped[float] = mapped_column(Float, nullable=False) class PeptideScoreDecoy(Base): __tablename__ = "PeptideScores_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - ScoreID = Column(Integer, primary_key=True, nullable=False) - ProcessingNodeID = Column(Integer) - ScoreValue = Column(Float, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ScoreID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProcessingNodeID: Mapped[int | None] = mapped_column(Integer) + ScoreValue: Mapped[float] = mapped_column(Float, nullable=False) class Peptide(Base): __tablename__ = "Peptides" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - SpectrumID = Column(Integer, nullable=False, index=True) - TotalIonsCount = Column(SmallInteger, nullable=False) - MatchedIonsCount = Column(SmallInteger, nullable=False) - ConfidenceLevel = Column(SmallInteger, nullable=False) - SearchEngineRank = Column(Integer, nullable=False) - Hidden = Column(Boolean, nullable=False, server_default=text("0")) - Sequence = Column(String) - Annotation = Column(String) - UniquePeptideSequenceID = Column(Integer, nullable=False, server_default=text("1")) - MissedCleavages = Column(SmallInteger, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + SpectrumID: Mapped[int] = mapped_column(Integer, nullable=False, index=True) + TotalIonsCount: Mapped[int] = mapped_column(SmallInteger, nullable=False) + MatchedIonsCount: Mapped[int] = mapped_column(SmallInteger, nullable=False) + ConfidenceLevel: Mapped[int] = mapped_column(SmallInteger, nullable=False) + SearchEngineRank: Mapped[int] = mapped_column(Integer, nullable=False) + Hidden: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default=text("0")) + Sequence: Mapped[str | None] = mapped_column(String) + Annotation: Mapped[str | None] = mapped_column(String) + UniquePeptideSequenceID: Mapped[int] = mapped_column( + Integer, nullable=False, server_default=text("1") + ) + MissedCleavages: Mapped[int] = mapped_column(SmallInteger, nullable=False) class PeptidesAminoAcidModification(Base): __tablename__ = "PeptidesAminoAcidModifications" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - AminoAcidModificationID = Column(Integer, primary_key=True, nullable=False) - Position = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True) + AminoAcidModificationID: Mapped[int] = mapped_column(Integer, primary_key=True) + Position: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptidesAminoAcidModificationsDecoy(Base): __tablename__ = "PeptidesAminoAcidModifications_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - AminoAcidModificationID = Column(Integer, primary_key=True, nullable=False) - Position = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True) + AminoAcidModificationID: Mapped[int] = mapped_column(Integer, primary_key=True) + Position: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptidesProtein(Base): __tablename__ = "PeptidesProteins" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - ProteinID = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, index=True) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptidesProteinDecoy(Base): __tablename__ = "PeptidesProteins_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - ProteinID = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, index=True) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptidesReferenceSpectra(Base): __tablename__ = "PeptidesReferenceSpectra" - PeptideID = Column(Integer, primary_key=True) - ReferenceSpectrumID = Column(Integer, nullable=False) + PeptideID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ReferenceSpectrumID: Mapped[int] = mapped_column(Integer) class PeptidesTerminalModification(Base): __tablename__ = "PeptidesTerminalModifications" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - TerminalModificationID = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True) + TerminalModificationID: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptidesTerminalModificationDecoy(Base): __tablename__ = "PeptidesTerminalModifications_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False) - TerminalModificationID = Column(Integer, primary_key=True, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True) + TerminalModificationID: Mapped[int] = mapped_column(Integer, primary_key=True) class PeptideDecoy(Base): __tablename__ = "Peptides_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - PeptideID = Column(Integer, primary_key=True, nullable=False, index=True) - SpectrumID = Column(Integer, nullable=False, index=True) - TotalIonsCount = Column(SmallInteger, nullable=False) - MatchedIonsCount = Column(SmallInteger, nullable=False) - ConfidenceLevel = Column(SmallInteger, nullable=False) - SearchEngineRank = Column(Integer, nullable=False) - Sequence = Column(String) - Annotation = Column(String) - UniquePeptideSequenceID = Column(Integer, nullable=False, server_default=text("1")) - MissedCleavages = Column(SmallInteger, nullable=False) - - -t_PrecursorIonAreaSearchSpectra = Table( - "PrecursorIonAreaSearchSpectra", - metadata, - Column("QuanResultID", Integer, nullable=False, index=True), - Column("SearchSpectrumID", Integer), -) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + PeptideID: Mapped[int] = mapped_column(Integer, primary_key=True, index=True) + SpectrumID: Mapped[int] = mapped_column(Integer, index=True) + TotalIonsCount: Mapped[int] = mapped_column(SmallInteger) + MatchedIonsCount: Mapped[int] = mapped_column(SmallInteger) + ConfidenceLevel: Mapped[int] = mapped_column(SmallInteger) + SearchEngineRank: Mapped[int] = mapped_column(Integer) + Sequence: Mapped[str | None] = mapped_column(String) + Annotation: Mapped[str | None] = mapped_column(String) + UniquePeptideSequenceID: Mapped[int] = mapped_column(Integer, server_default=text("1")) + MissedCleavages: Mapped[int] = mapped_column(SmallInteger) -t_PrecursorIonQuanResults = Table( - "PrecursorIonQuanResults", - metadata, - Column("QuanChannelID", Integer, nullable=False), - Column("QuanResultID", Integer, nullable=False), - Column("Mass", Float, nullable=False), - Column("Charge", Integer, nullable=False), - Column("Area", Float), - Column("RetentionTime", Float), - Index( - "IX_PrecursorIonQuanResults_QuanResultID_QuanChannelID", "QuanResultID", "QuanChannelID" - ), -) +class PrecursorIonAreaSearchSpectra(Base): + __tablename__ = "PrecursorIonAreaSearchSpectra" + QuanResultID: Mapped[int] = mapped_column( + Integer, primary_key=True, nullable=False, index=True + ) + SearchSpectrumID: Mapped[int | None] = mapped_column(Integer, primary_key=True) -t_PrecursorIonQuanResultsSearchSpectra = Table( - "PrecursorIonQuanResultsSearchSpectra", - metadata, - Column("ProcessingNodeNumber", Integer, nullable=False), - Column("QuanResultID", Integer, nullable=False, index=True), - Column("SearchSpectrumID", Integer, index=True), -) +class PrecursorIonQuanResult(Base): + __tablename__ = "PrecursorIonQuanResults" + __table_args__ = ( + Index( + "IX_PrecursorIonQuanResults_QuanResultID_QuanChannelID", + "QuanResultID", + "QuanChannelID", + ), + ) -t_ProcessingNodeConnectionPoints = Table( - "ProcessingNodeConnectionPoints", - metadata, - Column("ProcessingNodeID", Integer, nullable=False), - Column("Interface", String, nullable=False), - Column("ConnectionDirection", Integer, nullable=False), - Column("ConnectionMode", Integer, nullable=False), - Column("ConnectionMultiplicity", Integer, nullable=False), - Column("ConnectionRequirement", Integer, nullable=False), - Column("DataTypeSpecialization", String, nullable=False), - Column("ConnectionDisplayName", String, nullable=False), -) + QuanChannelID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + QuanResultID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Mass: Mapped[float] = mapped_column(Float, nullable=False) + Charge: Mapped[int] = mapped_column(Integer, nullable=False) + Area: Mapped[float | None] = mapped_column(Float) + RetentionTime: Mapped[float | None] = mapped_column(Float) + + +class PrecursorIonQuanResultsSearchSpectra(Base): + __tablename__ = "PrecursorIonQuanResultsSearchSpectra" + + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + QuanResultID: Mapped[int] = mapped_column( + Integer, primary_key=True, nullable=False, index=True + ) + SearchSpectrumID: Mapped[int | None] = mapped_column(Integer, index=True) + + +class ProcessingNodeConnectionPoint(Base): + __tablename__ = "ProcessingNodeConnectionPoints" + + ProcessingNodeID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Interface: Mapped[str] = mapped_column(String, primary_key=True, nullable=False) + ConnectionDirection: Mapped[int] = mapped_column(Integer, nullable=False) + ConnectionMode: Mapped[int] = mapped_column(Integer, nullable=False) + ConnectionMultiplicity: Mapped[int] = mapped_column(Integer, nullable=False) + ConnectionRequirement: Mapped[int] = mapped_column(Integer, nullable=False) + DataTypeSpecialization: Mapped[str] = mapped_column(String, nullable=False) + ConnectionDisplayName: Mapped[str] = mapped_column(String, nullable=False) class ProcessingNodeExtension(Base): __tablename__ = "ProcessingNodeExtensions" - ExtensionID = Column(Integer, primary_key=True) - ProcessingNodeNumber = Column(Integer, nullable=False) - Guid = Column(String, nullable=False) - Purpose = Column(String, nullable=False) - PurposeDetail = Column(String) - MajorVersion = Column(Integer, nullable=False) - MinorVersion = Column(Integer, nullable=False) - Settings = Column(Text) + ExtensionID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer) + Guid: Mapped[str] = mapped_column(String) + Purpose: Mapped[str] = mapped_column(String) + PurposeDetail: Mapped[str | None] = mapped_column(String) + MajorVersion: Mapped[int] = mapped_column(Integer) + MinorVersion: Mapped[int] = mapped_column(Integer) + Settings: Mapped[str | None] = mapped_column(Text) class ProcessingNodeFilterParameter(Base): __tablename__ = "ProcessingNodeFilterParameters" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - FilterParameterName = Column(String, primary_key=True, nullable=False) - FilterModuleTypeID = Column(Integer, nullable=False) - FilterModuleNumber = Column(Integer, nullable=False) - ProcessingNodeID = Column(Integer, nullable=False) - FilterParameterValue = Column(Float, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + FilterParameterName: Mapped[str] = mapped_column(String, primary_key=True) + FilterModuleTypeID: Mapped[int] = mapped_column(Integer) + FilterModuleNumber: Mapped[int] = mapped_column(Integer) + ProcessingNodeID: Mapped[int] = mapped_column(Integer) + FilterParameterValue: Mapped[float] = mapped_column(Float) -t_ProcessingNodeInterfaces = Table( - "ProcessingNodeInterfaces", - metadata, - Column("ProcessingNodeID", Integer, nullable=False), - Column("InterfaceKind", Integer, nullable=False), - Column("InterfaceName", String, nullable=False), -) +class ProcessingNodeInterface(Base): + __tablename__ = "ProcessingNodeInterfaces" + + ProcessingNodeID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + InterfaceKind: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + InterfaceName: Mapped[str] = mapped_column(String, primary_key=True, nullable=False) class ProcessingNodeParameter(Base): __tablename__ = "ProcessingNodeParameters" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - ParameterName = Column(String, primary_key=True, nullable=False) - FriendlyName = Column(String, nullable=False) - ProcessingNodeID = Column(Integer, nullable=False) - IntendedPurpose = Column(Integer, nullable=False) - PurposeDetails = Column(String, nullable=False) - Hidden = Column(Boolean, nullable=False) - Advanced = Column(Boolean, nullable=False) - Category = Column(String, nullable=False) - Position = Column(Integer, nullable=False) - ParameterValue = Column(String, nullable=False) - ValueDisplayString = Column(String, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + ParameterName: Mapped[str] = mapped_column(String, primary_key=True) + FriendlyName: Mapped[str] = mapped_column(String) + ProcessingNodeID: Mapped[int] = mapped_column(Integer) + IntendedPurpose: Mapped[int] = mapped_column(Integer) + PurposeDetails: Mapped[str] = mapped_column(String) + Hidden: Mapped[bool] = mapped_column(Boolean) + Advanced: Mapped[bool] = mapped_column(Boolean) + Category: Mapped[str] = mapped_column(String) + Position: Mapped[int] = mapped_column(Integer) + ParameterValue: Mapped[str] = mapped_column(String) + ValueDisplayString: Mapped[str] = mapped_column(String) class ProcessingNodeScore(Base): __tablename__ = "ProcessingNodeScores" __table_args__ = (UniqueConstraint("ProcessingNodeID", "ScoreName"),) - ProcessingNodeID = Column(Integer, nullable=False) - ScoreID = Column(Integer, primary_key=True) - ScoreName = Column(String, nullable=False) - FriendlyName = Column(String, nullable=False) - Description = Column(String, nullable=False) - FormatString = Column(String, nullable=False) - ScoreCategory = Column(Integer, nullable=False) - Hidden = Column(Boolean, nullable=False) - IsMainScore = Column(Boolean, nullable=False) - ScoreGUID = Column(String, nullable=False) + ProcessingNodeID: Mapped[int] = mapped_column(Integer) + ScoreID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ScoreName: Mapped[str] = mapped_column(String) + FriendlyName: Mapped[str] = mapped_column(String) + Description: Mapped[str] = mapped_column(String) + FormatString: Mapped[str] = mapped_column(String) + ScoreCategory: Mapped[int] = mapped_column(Integer) + Hidden: Mapped[bool] = mapped_column(Boolean) + IsMainScore: Mapped[bool] = mapped_column(Boolean) + ScoreGUID: Mapped[str] = mapped_column(String) class ProcessingNode(Base): __tablename__ = "ProcessingNodes" - ProcessingNodeNumber = Column(Integer, primary_key=True) - ProcessingNodeID = Column(Integer, nullable=False) - ProcessingNodeParentNumber = Column(String, nullable=False) - NodeName = Column(String) - FriendlyName = Column(String, nullable=False) - MajorVersion = Column(Integer, nullable=False) - MinorVersion = Column(Integer, nullable=False) - NodeComment = Column(String) - NodeGUID = Column(String, nullable=False) - ProcessingNodeState = Column(Integer, nullable=False, server_default=text("0")) + ProcessingNodeNumber: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ProcessingNodeID: Mapped[int] = mapped_column(Integer) + ProcessingNodeParentNumber: Mapped[str] = mapped_column(String) + NodeName: Mapped[str | None] = mapped_column(String) + FriendlyName: Mapped[str] = mapped_column(String) + MajorVersion: Mapped[int] = mapped_column(Integer) + MinorVersion: Mapped[int] = mapped_column(Integer) + NodeComment: Mapped[str | None] = mapped_column(String) + NodeGUID: Mapped[str] = mapped_column(String) + ProcessingNodeState: Mapped[int] = mapped_column(Integer, server_default=text("0")) class ProcessingNodesSpectra(Base): __tablename__ = "ProcessingNodesSpectra" - SendingProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - SpectrumID = Column(Integer, primary_key=True, nullable=False, index=True) + SendingProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + SpectrumID: Mapped[int] = mapped_column(Integer, primary_key=True, index=True) class ProteinAnnotation(Base): @@ -593,77 +630,76 @@ class ProteinAnnotation(Base): ), ) - ProteinAnnotationID = Column(Integer, primary_key=True) - ProteinID = Column(Integer, nullable=False) - DescriptionHashCode = Column(BigInteger, nullable=False) - Description = Column(Text, nullable=False) - TaxonomyID = Column(Integer, nullable=False, index=True) + ProteinAnnotationID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ProteinID: Mapped[int] = mapped_column(Integer) + DescriptionHashCode: Mapped[int] = mapped_column(BigInteger) + Description: Mapped[str] = mapped_column(Text) + TaxonomyID: Mapped[int] = mapped_column(Integer, index=True) class ProteinIdentificationGroup(Base): __tablename__ = "ProteinIdentificationGroups" - ProteinIdentificationGroupId = Column(Integer, primary_key=True, nullable=False) - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) + ProteinIdentificationGroupId: Mapped[int] = mapped_column(Integer, primary_key=True) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) class ProteinScore(Base): __tablename__ = "ProteinScores" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - ProteinID = Column(Integer, primary_key=True, nullable=False) - ProteinIdentificationGroupID = Column(Integer, nullable=False) - ProteinScore = Column(Float, nullable=False) - Coverage = Column(Float, nullable=False, server_default=text("0")) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True) + ProteinIdentificationGroupID: Mapped[int] = mapped_column(Integer) + ProteinScore: Mapped[float] = mapped_column(Float) + Coverage: Mapped[float] = mapped_column(Float, server_default=text("0")) class ProteinScoresDecoy(Base): __tablename__ = "ProteinScores_decoy" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - ProteinID = Column(Integer, primary_key=True, nullable=False) - ProteinIdentificationGroupID = Column(Integer, nullable=False) - ProteinScore = Column(Float, nullable=False) - Coverage = Column(Float, nullable=False, server_default=text("0")) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True) + ProteinIdentificationGroupID: Mapped[int] = mapped_column(Integer) + ProteinScore: Mapped[float] = mapped_column(Float) + Coverage: Mapped[float] = mapped_column(Float, server_default=text("0")) class Protein(Base): __tablename__ = "Proteins" - ProteinID = Column(Integer, primary_key=True) - Sequence = Column(Text, nullable=False) - SequenceHashCode = Column(BigInteger, nullable=False, index=True) - IsMasterProtein = Column(Boolean, nullable=False, server_default=text("0")) + ProteinID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + Sequence: Mapped[str] = mapped_column(Text) + SequenceHashCode: Mapped[int] = mapped_column(BigInteger, index=True) + IsMasterProtein: Mapped[bool] = mapped_column(Boolean, server_default=text("0")) -t_ProteinsProteinGroups = Table( - "ProteinsProteinGroups", - metadata, - Column("ProteinID", Integer, nullable=False), - Column("ProteinGroupID", Integer, nullable=False), -) +class ProteinsProteinGroup(Base): + __tablename__ = "ProteinsProteinGroups" + + ProteinID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + ProteinGroupID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) class PtmAnnotationDatum(Base): __tablename__ = "PtmAnnotationData" - AnnotationType = Column(Integer, primary_key=True, nullable=False) - ProteinId = Column(Integer, primary_key=True, nullable=False) - AnnotationId = Column(Integer, primary_key=True, nullable=False) - Position = Column(Integer, primary_key=True, nullable=False) - Annotation = Column(String) + AnnotationType: Mapped[int] = mapped_column(Integer, primary_key=True) + ProteinId: Mapped[int] = mapped_column(Integer, primary_key=True) + AnnotationId: Mapped[int] = mapped_column(Integer, primary_key=True) + Position: Mapped[int] = mapped_column(Integer, primary_key=True) + Annotation: Mapped[str | None] = mapped_column(String) class ReferenceSpectra(Base): __tablename__ = "ReferenceSpectra" - ReferenceSpectrumId = Column(Integer, primary_key=True) - Sequence = Column(String, nullable=False) - SequenceHashCode = Column(BigInteger, nullable=False) - Spectrum = Column(String, nullable=False) - SpectrumHashCode = Column(BigInteger, nullable=False) - Comment = Column(Text) - CommentHashCode = Column(BigInteger, nullable=False) + ReferenceSpectrumId: Mapped[int | None] = mapped_column(Integer, primary_key=True) + Sequence: Mapped[str] = mapped_column(String) + SequenceHashCode: Mapped[int] = mapped_column(BigInteger) + Spectrum: Mapped[str] = mapped_column(String) + SpectrumHashCode: Mapped[int] = mapped_column(BigInteger) + Comment: Mapped[str | None] = mapped_column(Text) + CommentHashCode: Mapped[int] = mapped_column(BigInteger) class ReporterIonQuanResult(Base): @@ -676,84 +712,82 @@ class ReporterIonQuanResult(Base): ), ) - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - QuanChannelID = Column(Integer, primary_key=True, nullable=False) - SpectrumID = Column(Integer, primary_key=True, nullable=False) - Mass = Column(Float, nullable=False) - Height = Column(Float) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + QuanChannelID: Mapped[int] = mapped_column(Integer, primary_key=True) + SpectrumID: Mapped[int] = mapped_column(Integer, primary_key=True) + Mass: Mapped[float] = mapped_column(Float) + Height: Mapped[float | None] = mapped_column(Float) -t_ReporterIonQuanResultsSearchSpectra = Table( - "ReporterIonQuanResultsSearchSpectra", - metadata, - Column("ProcessingNodeNumber", Integer, nullable=False), - Column("SpectrumID", Integer, nullable=False), - Column("SearchSpectrumID", Integer, index=True), -) +class ReporterIonQuanResultsSearchSpectra(Base): + __tablename__ = "ReporterIonQuanResultsSearchSpectra" + + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + SpectrumID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + SearchSpectrumID: Mapped[int | None] = mapped_column(Integer, index=True) class ScanEvent(Base): __tablename__ = "ScanEvents" - ScanEventID = Column(Integer, primary_key=True) - MSLevel = Column(Integer, nullable=False) - Polarity = Column(Integer, nullable=False) - ScanType = Column(Integer, nullable=False) - Ionization = Column(Integer, nullable=False) - MassAnalyzer = Column(Integer, nullable=False) - ActivationType = Column(Integer, nullable=False) + ScanEventID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + MSLevel: Mapped[int] = mapped_column(Integer) + Polarity: Mapped[int] = mapped_column(Integer) + ScanType: Mapped[int] = mapped_column(Integer) + Ionization: Mapped[int] = mapped_column(Integer) + MassAnalyzer: Mapped[int] = mapped_column(Integer) + ActivationType: Mapped[int] = mapped_column(Integer) class SchemaInfo(Base): __tablename__ = "SchemaInfo" - Version = Column(Integer, primary_key=True) - Kind = Column(String, nullable=False) - Date = Column(DateTime, nullable=False) - SoftwareVersion = Column(String, nullable=False) - Comment = Column(Text, nullable=False) + Version: Mapped[int | None] = mapped_column(Integer, primary_key=True) + Kind: Mapped[str] = mapped_column(String) + Date: Mapped[datetime] = mapped_column(DateTime) + SoftwareVersion: Mapped[str] = mapped_column(String) + Comment: Mapped[str] = mapped_column(Text) class Spectrum(Base): __tablename__ = "Spectra" - UniqueSpectrumID = Column(Integer, primary_key=True) - Spectrum = Column(String, nullable=False) - SpectrumHashCode = Column(BigInteger) + UniqueSpectrumID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + Spectrum: Mapped[str] = mapped_column(String) + SpectrumHashCode: Mapped[int | None] = mapped_column(BigInteger) class SpectrumHeader(Base): __tablename__ = "SpectrumHeaders" - SpectrumID = Column(Integer, primary_key=True) - MassPeakID = Column(Integer) - ScanEventID = Column(Integer) - LastScan = Column(Integer) - FirstScan = Column(Integer) - RetentionTime = Column(Float) - Hidden = Column(Boolean, nullable=False, server_default=text("0")) - ScanNumbers = Column(String) - Charge = Column(SmallInteger) - Mass = Column(Float) - CreatingProcessingNodeNumber = Column(Integer, nullable=False) - UniqueSpectrumID = Column(Integer, nullable=False, server_default=text("0")) + SpectrumID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + MassPeakID: Mapped[int | None] = mapped_column(Integer) + ScanEventID: Mapped[int | None] = mapped_column(Integer) + LastScan: Mapped[int | None] = mapped_column(Integer) + FirstScan: Mapped[int | None] = mapped_column(Integer) + RetentionTime: Mapped[float | None] = mapped_column(Float) + Hidden: Mapped[bool] = mapped_column(Boolean, server_default=text("0")) + ScanNumbers: Mapped[str | None] = mapped_column(String) + Charge: Mapped[int | None] = mapped_column(SmallInteger) + Mass: Mapped[float | None] = mapped_column(Float) + CreatingProcessingNodeNumber: Mapped[int] = mapped_column(Integer) + UniqueSpectrumID: Mapped[int] = mapped_column(Integer, server_default=text("0")) class SpectrumScore(Base): __tablename__ = "SpectrumScores" - ProcessingNodeNumber = Column(Integer, primary_key=True, nullable=False) - SpectrumID = Column(Integer, primary_key=True, nullable=False) - Score = Column(Float, nullable=False) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer, primary_key=True) + SpectrumID: Mapped[int] = mapped_column(Integer, primary_key=True) + Score: Mapped[float] = mapped_column(Float) -t_TaxonomyNames = Table( - "TaxonomyNames", - metadata, - Column("TaxonomyID", Integer, nullable=False, index=True), - Column("Name", String), - Column("NameCategory", Integer, nullable=False), -) +class TaxonomyName(Base): + __tablename__ = "TaxonomyNames" + + TaxonomyID: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False, index=True) + NameCategory: Mapped[int] = mapped_column(Integer, primary_key=True, nullable=False) + Name: Mapped[str | None] = mapped_column(String) class TaxonomyNode(Base): @@ -762,42 +796,37 @@ class TaxonomyNode(Base): Index("IX_TaxonomyNodes_LeftNodeIndex_RightNodeIndex", "LeftNodeIndex", "RightNodeIndex"), ) - TaxonomyID = Column(Integer, primary_key=True, unique=True) - ParentTaxonomyID = Column(Integer, nullable=False) - TaxonomyRank = Column(Integer, nullable=False) - LeftNodeIndex = Column(Integer, nullable=False) - RightNodeIndex = Column(Integer, nullable=False) - - -t_WorkflowInfo = Table( - "WorkflowInfo", - metadata, - Column("WorkflowName", String, nullable=False), - Column("WorkflowDescription", String, nullable=False), - Column("WorkflowState", Integer, nullable=False, server_default=text("0")), - Column("WorkflowStartDate", DateTime, nullable=False), - Column("WorkflowTemplate", String, nullable=False), - Column("User", String, nullable=False), - Column("WorkflowGUID", String, nullable=False), - Column("MachineGUID", String, nullable=False), - Column("MachineName", String, nullable=False), - Column("MergeSimilarIdentificationResults", Boolean, nullable=False), - Column("IsValid", Boolean, nullable=False), - Column("Version", Integer, nullable=False), -) + TaxonomyID: Mapped[int | None] = mapped_column(Integer, primary_key=True, unique=True) + ParentTaxonomyID: Mapped[int] = mapped_column(Integer) + TaxonomyRank: Mapped[int] = mapped_column(Integer) + LeftNodeIndex: Mapped[int] = mapped_column(Integer) + RightNodeIndex: Mapped[int] = mapped_column(Integer) -class WorkflowMessage(Base): - __tablename__ = "WorkflowMessages" +# TODO: Check which is primary key +class WorkflowInfo(Base): + __tablename__ = "WorkflowInfo" - MessageID = Column(Integer, primary_key=True) - ProcessingNodeID = Column(Integer, nullable=False) - ProcessingNodeNumber = Column(Integer, nullable=False) - Time = Column(BigInteger, nullable=False) - MessageKind = Column(Integer, nullable=False) - Message = Column(String, nullable=False) + WorkflowGUID: Mapped[str] = mapped_column(String, primary_key=True, nullable=False) + WorkflowName: Mapped[str] = mapped_column(String, nullable=False) + WorkflowDescription: Mapped[str] = mapped_column(String, nullable=False) + WorkflowState: Mapped[int] = mapped_column(Integer, nullable=False, server_default=text("0")) + WorkflowStartDate: Mapped[datetime] = mapped_column(DateTime, nullable=False) + WorkflowTemplate: Mapped[str] = mapped_column(String, nullable=False) + User: Mapped[str] = mapped_column(String, nullable=False) + MachineGUID: Mapped[str] = mapped_column(String, nullable=False) + MachineName: Mapped[str] = mapped_column(String, nullable=False) + MergeSimilarIdentificationResults: Mapped[bool] = mapped_column(Boolean, nullable=False) + IsValid: Mapped[bool] = mapped_column(Boolean, nullable=False) + Version: Mapped[int] = mapped_column(Integer, nullable=False) -t_sqlite_sequence = Table( - "sqlite_sequence", metadata, Column("name", NullType), Column("seq", NullType) -) +class WorkflowMessage(Base): + __tablename__ = "WorkflowMessages" + + MessageID: Mapped[int | None] = mapped_column(Integer, primary_key=True) + ProcessingNodeID: Mapped[int] = mapped_column(Integer) + ProcessingNodeNumber: Mapped[int] = mapped_column(Integer) + Time: Mapped[int] = mapped_column(BigInteger) + MessageKind: Mapped[int] = mapped_column(Integer) + Message: Mapped[str] = mapped_column(String) diff --git a/psm_utils/io/_utils.py b/psm_utils/io/_utils.py index e1572c2..01f714f 100644 --- a/psm_utils/io/_utils.py +++ b/psm_utils/io/_utils.py @@ -2,13 +2,14 @@ import sys -def set_csv_field_size_limit(): +def set_csv_field_size_limit() -> None: """ - Sets the maximum field size limit for reading CSV files. + Set the maximum field size limit for reading CSV files. - Note: - This function should be called before reading any CSV files to ensure that the field size - limit is properly set. + Notes + ----- + This function should be called before reading any CSV files to ensure that the field size + limit is properly set. """ max_int = sys.maxsize diff --git a/psm_utils/io/alphadia.py b/psm_utils/io/alphadia.py index 8f6e1b8..3b4c15b 100644 --- a/psm_utils/io/alphadia.py +++ b/psm_utils/io/alphadia.py @@ -4,7 +4,11 @@ import csv from abc import ABC -from typing import Iterable, Optional +from collections.abc import Iterator +from pathlib import Path +from typing import Any, cast + +import pandas as pd from psm_utils.io._base_classes import ReaderBase from psm_utils.io._utils import set_csv_field_size_limit @@ -14,7 +18,7 @@ set_csv_field_size_limit() # TODO: check -RESCORING_FEATURES = [ +RESCORING_FEATURES: list[str] = [ "rt_observed", "mobility_observed", "mz_observed", @@ -24,29 +28,37 @@ class AlphaDIAReader(ReaderBase, ABC): - def __init__(self, filename, *args, **kwargs): + """Reader for AlphaDIA TSV format.""" + + def __init__(self, filename: str | Path, *args: Any, **kwargs: Any) -> None: """ Reader for AlphaDIA ``precursor.tsv`` file. Parameters ---------- - filename : str or Path + filename Path to PSM file. + *args + Additional positional arguments for parent class. + **kwargs + Additional keyword arguments for parent class. """ super().__init__(filename, *args, **kwargs) - self.filename = filename - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with open(self.filename) as msms_in: reader = csv.DictReader(msms_in, delimiter="\t") for row in reader: - yield self._get_peptide_spectrum_match(row) + yield self._get_peptide_spectrum_match(row, self.filename) - def _get_peptide_spectrum_match(self, psm_dict) -> PSM: + @staticmethod + def _get_peptide_spectrum_match( + psm_dict: dict[str, Any], filename: str | Path | None = None + ) -> PSM: """Parse a single PSM from a AlphaDIA PSM file.""" - rescoring_features = {} + rescoring_features: dict[str, Any] = {} for ft in RESCORING_FEATURES: try: rescoring_features[ft] = psm_dict[ft] @@ -54,7 +66,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: continue return PSM( - peptidoform=self._parse_peptidoform( + peptidoform=AlphaDIAReader._parse_peptidoform( psm_dict["sequence"], psm_dict["mods"], psm_dict["mod_sites"], psm_dict["charge"] ), spectrum_id=psm_dict["frame_start"], # TODO: needs to be checked @@ -70,20 +82,20 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: protein_list=psm_dict["proteins"].split(";"), rank=int(psm_dict["rank"]) + 1, # AlphaDIA ranks are 0-based source="AlphaDIA", - provenance_data=({"alphadia_filename": str(self.filename)}), + provenance_data=({"alphadia_filename": str(filename)} if filename else {}), metadata={}, rescoring_features=rescoring_features, ) @staticmethod - def _parse_peptidoform(sequence: str, mods: str, mod_sites, charge: Optional[str]) -> str: + def _parse_peptidoform(sequence: str, mods: str, mod_sites: str, charge: str | None) -> str: """Parse a peptidoform from a AlphaDIA PSM file.""" # Parse modifications if mods: - sequence_list = [""] + list(sequence) + [""] # N-term, sequence, C-term - for mod, site in zip(mods.split(";"), mod_sites.split(";")): - site = int(site) - name = mod.split("@")[0] + sequence_list: list[str] = [""] + list(sequence) + [""] # N-term, sequence, C-term + for mod, site_str in zip(mods.split(";"), mod_sites.split(";")): + site: int = int(site_str) + name: str = mod.split("@")[0] # N-terminal modification if site == 0: sequence_list[0] = f"[{name}]-" @@ -102,11 +114,7 @@ def _parse_peptidoform(sequence: str, mods: str, mod_sites, charge: Optional[str return sequence @classmethod - def from_dataframe(cls, dataframe) -> PSMList: + def from_dataframe(cls, dataframe: pd.DataFrame) -> PSMList: """Create a PSMList from a AlphaDIA Pandas DataFrame.""" - return PSMList( - psm_list=[ - cls._get_peptide_spectrum_match(cls(""), entry) - for entry in dataframe.to_dict(orient="records") - ] - ) + records = cast(list[dict[str, Any]], dataframe.to_dict(orient="records")) + return PSMList(psm_list=[cls._get_peptide_spectrum_match(entry) for entry in records]) diff --git a/psm_utils/io/diann.py b/psm_utils/io/diann.py index 1d93183..2d46a5b 100644 --- a/psm_utils/io/diann.py +++ b/psm_utils/io/diann.py @@ -1,12 +1,11 @@ """ -Reader for PSM files from DIA-NN +Reader for PSM files from DIA-NN. Reads the '.tsv' file as defined on the `DIA-NN documentation page `_. Notes ----- - - DIA-NN calculates q-values at both the run and library level. The run-level q-value is used as the PSM q-value. - DIA-NN currently does not return precursor m/z values. @@ -18,7 +17,11 @@ import csv import re -from typing import Iterable, Optional +from collections.abc import Iterator +from pathlib import Path +from typing import Any, cast + +import pandas as pd from psm_utils.io._base_classes import ReaderBase from psm_utils.io._utils import set_csv_field_size_limit @@ -27,7 +30,7 @@ set_csv_field_size_limit() -RESCORING_FEATURES = [ +RESCORING_FEATURES: list[str] = [ "RT", "Predicted.RT", "iRT", @@ -42,7 +45,9 @@ class DIANNTSVReader(ReaderBase): - def __init__(self, filename, *args, **kwargs) -> None: + """Reader for DIA-NN TSV format.""" + + def __init__(self, filename: str | Path, *args: Any, **kwargs: Any) -> None: """ Reader for DIA-NN '.tsv' file. @@ -50,21 +55,27 @@ def __init__(self, filename, *args, **kwargs) -> None: ---------- filename : str or Path Path to PSM file. + *args + Additional positional arguments passed to the base class. + **kwargs + Additional keyword arguments passed to the base class. """ super().__init__(filename, *args, **kwargs) - self.filename = filename - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with open(self.filename) as msms_in: reader = csv.DictReader(msms_in, delimiter="\t") for row in reader: - yield self._get_peptide_spectrum_match(row) + yield self._get_peptide_spectrum_match(row, self.filename) - def _get_peptide_spectrum_match(self, psm_dict) -> PSM: + @staticmethod + def _get_peptide_spectrum_match( + psm_dict: dict[str, str], filename: str | Path | None = None + ) -> PSM: """Parse a single PSM from a DIA-NN PSM file.""" - rescoring_features = {} + rescoring_features: dict[str, Any] = {} for ft in RESCORING_FEATURES: try: rescoring_features[ft] = psm_dict[ft] @@ -72,7 +83,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: continue return PSM( - peptidoform=self._parse_peptidoform( + peptidoform=DIANNTSVReader._parse_peptidoform( psm_dict["Modified.Sequence"], psm_dict["Precursor.Charge"] ), spectrum_id=psm_dict["MS2.Scan"], @@ -87,20 +98,20 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: protein_list=psm_dict["Protein.Ids"].split(";"), source="diann", rank=None, - provenance_data=({"diann_filename": str(self.filename)}), + provenance_data=({"diann_filename": str(filename)} if filename else {}), rescoring_features=rescoring_features, metadata={}, ) @staticmethod - def _parse_peptidoform(peptide: str, charge: Optional[str]) -> str: + def _parse_peptidoform(peptide: str, charge: str | None) -> str: # Add charge if charge: peptide += f"/{int(float(charge))}" # Replace parentheses with square brackets and capitalize UniMod prefix - pattern = r"\(UniMod:(\d+)\)" - replacement = r"[UNIMOD:\1]" + pattern: str = r"\(UniMod:(\d+)\)" + replacement: str = r"[UNIMOD:\1]" peptide = re.sub(pattern, replacement, peptide) # Add hyphen for N-terminal modifications @@ -115,11 +126,7 @@ def _parse_peptidoform(peptide: str, charge: Optional[str]) -> str: return peptide @classmethod - def from_dataframe(cls, dataframe) -> PSMList: + def from_dataframe(cls, dataframe: pd.DataFrame) -> PSMList: """Create a PSMList from a DIA-NN Pandas DataFrame.""" - return PSMList( - ptm_list=[ - cls._get_peptide_spectrum_match(cls(""), entry) - for entry in dataframe.to_dict(orient="records") - ] - ) + records = cast(list[dict[str, str]], dataframe.to_dict(orient="records")) + return PSMList(psm_list=[cls._get_peptide_spectrum_match(entry) for entry in records]) diff --git a/psm_utils/io/flashlfq.py b/psm_utils/io/flashlfq.py index e2dac9f..5ca1a13 100644 --- a/psm_utils/io/flashlfq.py +++ b/psm_utils/io/flashlfq.py @@ -19,8 +19,9 @@ import csv import logging +from collections.abc import Iterator from pathlib import Path -from typing import Optional, Union +from typing import Any import numpy as np @@ -38,12 +39,16 @@ class FlashLFQReader(ReaderBase): """Reader for FlashLFQ TSV format.""" - required_columns = ["Full Sequence", "Precursor Charge"] + required_columns: list[str] = ["Full Sequence", "Precursor Charge"] - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "rt") as open_file: + with open(self.filename) as open_file: reader = csv.DictReader(open_file, delimiter="\t") + if not reader.fieldnames: + raise PSMUtilsIOException( + f"FlashLFQ TSV file '{self.filename}' is empty or has no valid header." + ) if not all(col in reader.fieldnames for col in self.required_columns): raise PSMUtilsIOException( f"FlashLFQ TSV file must contain the following columns: {self.required_columns}" @@ -51,7 +56,7 @@ def __iter__(self): for i, row in enumerate(reader): yield self._parse_entry(row, spectrum_id=str(i)) - def _parse_entry(self, entry: dict, spectrum_id) -> PSM: + def _parse_entry(self, entry: dict[str, Any], spectrum_id: str) -> PSM: """Parse single FlashLFQ TSV entry to :py:class:`~psm_utils.psm.PSM`.""" # Replace empty strings with None entry = {k: v if v else None for k, v in entry.items()} @@ -66,7 +71,7 @@ def _parse_entry(self, entry: dict, spectrum_id) -> PSM: ) @staticmethod - def _parse_protein_list(protein_accession: Optional[str]) -> list[str]: + def _parse_protein_list(protein_accession: str | None) -> list[str]: """Parse protein list string to list of protein accessions.""" if not protein_accession: return [] @@ -81,14 +86,24 @@ def _parse_protein_list(protein_accession: Optional[str]) -> list[str]: class FlashLFQWriter(WriterBase): """Reader for FlashLFQ TSV format.""" + _default_fieldnames: list[str] = [ + "File Name", + "Base Sequence", + "Full Sequence", + "Peptide Monoisotopic Mass", + "Scan Retention Time", + "Precursor Charge", + "Protein Accession", + ] + def __init__( self, - filename: Union[str, Path], - *args, + filename: str | Path, + *args: Any, fdr_threshold: float = 0.01, only_targets: bool = True, - **kwargs, - ): + **kwargs: Any, + ) -> None: """ Reader for psm_utils TSV format. @@ -96,40 +111,37 @@ def __init__( ---------- filename Path to PSM file. + *args + Additional positional arguments passed to the base class. fdr_threshold FDR threshold for filtering PSMs. only_targets If True, only target PSMs are written to file. If False, both target and decoy PSMs are written. + **kwargs + Additional keyword arguments passed to the base class. """ super().__init__(filename, *args, **kwargs) - self.fdr_threshold = fdr_threshold - self.only_targets = only_targets + self.fdr_threshold: float = fdr_threshold + self.only_targets: bool = only_targets - self._open_file = None - self._writer = None - self.fieldnames = None + self._open_file: Any = None + self._writer: Any = None + self.fieldnames: list[str] | None = None def __enter__(self) -> FlashLFQWriter: + """Open file for writing and return self.""" if Path(self.filename).is_file(): # Get fieldnames from existing file - with open(self.filename, "rt") as open_file: + with open(self.filename) as open_file: # Get fieldnames self.fieldnames = open_file.readline().strip().split("\t") - mode = "at" + mode: str = "at" else: - # Set default fieldnames - self.fieldnames = [ - "File Name", - "Base Sequence", - "Full Sequence", - "Peptide Monoisotopic Mass", - "Scan Retention Time", - "Precursor Charge", - "Protein Accession", - ] + # Set default fieldnames; avoiding mutation of class variable + self.fieldnames = self._default_fieldnames[:] mode = "wt" # Open file and writer @@ -146,12 +158,14 @@ def __enter__(self) -> FlashLFQWriter: return self - def __exit__(self, *args, **kwargs) -> None: - self._open_file.close() + def __exit__(self, *args: Any, **kwargs: Any) -> None: + """Close file and writer.""" + if self._open_file is not None: + self._open_file.close() self._open_file = None self._writer = None - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """ Write a single PSM to new or existing PSM file. @@ -168,14 +182,14 @@ def write_psm(self, psm: PSM): entry = self._psm_to_entry(psm) try: - self._writer.writerow(entry) + self._writer.writerow(entry) # type: ignore[union-attr] except AttributeError as e: raise PSMUtilsIOException( f"`write_psm` method can only be called if `{self.__class__.__qualname__}`" "is opened in context (i.e., using the `with` statement)." ) from e - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """ Write an entire PSMList to a new PSM file. @@ -188,8 +202,8 @@ def write_file(self, psm_list: PSMList): # Filter out decoys if self.only_targets: # Accept both None and False - target_mask = np.array([not psm.is_decoy for psm in psm_list]) - LOGGER.debug(f"Skipping {~target_mask.sum()} decoy PSMs for FlashLFQ file.") + target_mask = np.array([not psm.is_decoy for psm in psm_list], dtype=bool) + LOGGER.debug(f"Skipping {(~target_mask).sum()} decoy PSMs for FlashLFQ file.") else: target_mask = np.ones(len(psm_list), dtype=bool) @@ -198,15 +212,18 @@ def write_file(self, psm_list: PSMList): LOGGER.warning( "Not all PSMs have a q-value. Skipping FDR filtering for FlashLFQ file." ) - fdr_mask = np.ones(len(psm_list), dtype=bool) + fdr_mask: np.ndarray[Any, np.dtype[np.bool_]] = np.ones(len(psm_list), dtype=bool) else: fdr_mask = psm_list["qvalue"] <= self.fdr_threshold - filtered_by_fdr = (~fdr_mask & target_mask).sum() + filtered_by_fdr: int = (~fdr_mask & target_mask).sum() LOGGER.debug(f"Skipping {filtered_by_fdr} PSMs above FDR threshold for FlashLFQ file.") - filtered_psm_list = psm_list[target_mask & fdr_mask] + filtered_psm_list: PSMList = psm_list[target_mask & fdr_mask] - with open(self.filename, "wt", newline="") as f: + with open(self.filename, "w", newline="") as f: + if not self.fieldnames: + # Set default fieldnames; avoiding mutation of class variable + self.fieldnames = self._default_fieldnames[:] writer = csv.DictWriter( f, fieldnames=self.fieldnames, delimiter="\t", extrasaction="ignore" ) @@ -215,7 +232,7 @@ def write_file(self, psm_list: PSMList): writer.writerow(self._psm_to_entry(psm)) @staticmethod - def _psm_to_entry(psm: PSM) -> dict: + def _psm_to_entry(psm: PSM) -> dict[str, Any]: """Convert :py:class:`~psm_utils.psm.PSM` to FlashLFQ TSV entry.""" return { "File Name": psm.run, diff --git a/psm_utils/io/fragpipe.py b/psm_utils/io/fragpipe.py index fc07395..badbdc5 100644 --- a/psm_utils/io/fragpipe.py +++ b/psm_utils/io/fragpipe.py @@ -6,7 +6,6 @@ Notes ----- - - Decoy PSMs and q-values are not returned by FragPipe. """ @@ -15,8 +14,11 @@ import csv from abc import ABC +from collections.abc import Iterator from pathlib import Path -from typing import Iterable, Optional +from typing import Any, cast + +import pandas as pd from psm_utils.io._base_classes import ReaderBase from psm_utils.io._utils import set_csv_field_size_limit @@ -25,7 +27,7 @@ set_csv_field_size_limit() -RESCORING_FEATURES = [ +RESCORING_FEATURES: list[str] = [ "Peptide Length", "Retention", "Observed Mass", @@ -38,12 +40,17 @@ class FragPipeReader(ReaderBase, ABC): + """Reader for FragPipe TSV format.""" + + use_calibrated_mz: bool + _mz_key: str + def __init__( self, - filename, + filename: str | Path, use_calibrated_mz: bool = True, - *args, - **kwargs, + *args: Any, + **kwargs: Any, ) -> None: """ Reader for MSFragger ``psm.tsv`` file. @@ -55,22 +62,25 @@ def __init__( use_calibrated_mz Whether to use ``Calibrated Observed M/Z`` (true) or non-calibrated ``Observed m/z`` (false), by default True. + *args + Additional positional arguments passed to the base class. + **kwargs + Additional keyword arguments passed to the base class. """ super().__init__(filename, *args, **kwargs) - self.filename = filename self.use_calibrated_mz = use_calibrated_mz self._mz_key = "Calibrated Observed M/Z" if use_calibrated_mz else "Observed M/Z" - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with open(self.filename) as msms_in: reader = csv.DictReader(msms_in, delimiter="\t") for row in reader: yield self._get_peptide_spectrum_match(row) - def _get_peptide_spectrum_match(self, psm_dict) -> PSM: + def _get_peptide_spectrum_match(self, psm_dict: dict[str, Any]) -> PSM: """Parse a single PSM from a FragPipe PSM file.""" rescoring_features = {ft: psm_dict[ft] for ft in RESCORING_FEATURES if ft in psm_dict} @@ -98,7 +108,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: ) @staticmethod - def _parse_peptidoform(mod_peptide: str, peptide: str, charge: Optional[str]) -> str: + def _parse_peptidoform(mod_peptide: str, peptide: str, charge: str | None) -> str: """Parse the peptidoform from the modified peptide, peptide, and charge columns.""" if mod_peptide: peptide = mod_peptide @@ -117,14 +127,14 @@ def _parse_peptidoform(mod_peptide: str, peptide: str, charge: Optional[str]) -> @staticmethod def _parse_spectrum_id(spectrum: str) -> str: - """Extract scan number from spectrum ID: ``(file name).(scan #).(scan #).(charge).``""" + """Extract scan number from spectrum ID: ``(file name).(scan #).(scan #).(charge).``.""" try: return spectrum.split(".")[-2] except IndexError: return spectrum @staticmethod - def _parse_protein_list(razor_protein: str, mapped_proteins) -> list[str]: + def _parse_protein_list(razor_protein: str, mapped_proteins: str | None) -> list[str]: """Combine razor protein and mapped proteins into a single list.""" if mapped_proteins: mapped_proteins_list = mapped_proteins.split(", ") @@ -144,11 +154,14 @@ def _parse_run(spectrum_file: str) -> str: return Path(spectrum_file).stem @classmethod - def from_dataframe(cls, dataframe) -> PSMList: - """Create a PSMList from a pandas DataFrame.""" + def from_dataframe(cls, dataframe: pd.DataFrame) -> PSMList: + """Create a PSMList from a Pandas DataFrame.""" + # Create a temporary reader instance to access the parsing method + temp_reader = cls(filename="") + return PSMList( - ptm_list=[ - cls._get_peptide_spectrum_match(cls(""), entry) + psm_list=[ + temp_reader._get_peptide_spectrum_match(cast(dict[str, Any], entry)) for entry in dataframe.to_dict(orient="records") ] ) diff --git a/psm_utils/io/idxml.py b/psm_utils/io/idxml.py index c993d85..953b27b 100644 --- a/psm_utils/io/idxml.py +++ b/psm_utils/io/idxml.py @@ -1,27 +1,27 @@ """ Interface with OpenMS idXML PSM files. - Notes ----- - * idXML supports multiple peptide hits (identifications) per spectrum. Each peptide hit is parsed as an individual :py:class:`~psm_utils.psm.PSM` object. """ + from __future__ import annotations import logging import re -from warnings import filterwarnings +from collections.abc import Iterator from pathlib import Path -from typing import Iterable, List, Tuple, Union +from typing import Any, cast +from warnings import filterwarnings -from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase, WriterBase +from psm_utils.io.exceptions import PSMUtilsIOException +from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList -from psm_utils.peptidoform import Peptidoform filterwarnings( "ignore", @@ -31,14 +31,24 @@ ) try: - import pyopenms as oms #noqa: E402 + import pyopenms as oms # type: ignore[import] + + _has_openms = True except ImportError: _has_openms = False -else: - _has_openms = True + oms = None # type: ignore[assignment] logger = logging.getLogger(__name__) +DEFAULT_SCORE_TYPE = "search_engine_score" +TARGET_DECOY_KEY = "target_decoy" +QVALUE_KEY = "q-value" +PEP_KEY = "PEP" +SPECTRUM_REFERENCE_KEY = "spectrum_reference" +ID_MERGE_INDEX_KEY = "id_merge_index" +SPECTRA_DATA_KEY = "spectra_data" +ION_MOBILITY_KEY = "IM" + # Patterns to match open and closed round/square brackets MOD_PATTERN = re.compile(r"\(((?:[^)(]+|\((?:[^)(]+|\([^)(]*\))*\))*)\)") MOD_PATTERN_NTERM = re.compile(r"^\.\[((?:[^][]+|\[(?:[^][]+|\[[^][]*\])*\])*)\]") @@ -47,7 +57,8 @@ # Extracted from the OpenMS PSMFeatureExtractor, which adds and manipulates features that will be given to percolator # https://github.com/OpenMS/OpenMS/blob/342f6524e76a2bab3dcb428ba2f4aa2d6bfe8483/src/topp/PSMFeatureExtractor.cpp RESCORING_FEATURE_LIST = [ - "isotope_error" "MS:1002049", # MSGFPlus unchanged RawScore + "isotope_error", + "MS:1002049", # MSGFPlus unchanged RawScore "MS:1002050", # MSGFPlus unchanged DeNovoScore "MSGF:ScoreRatio", "MSGF:Energy", @@ -62,8 +73,8 @@ "XTANDEM:hyperscore", "XTANDEM:deltascore", "MS:1001330", # expect_score - "hyperscore", # MSfragger - "nextscore", # MSfragger + "hyperscore", # MSFragger + "nextscore", # MSFragger "COMET:deltaCn", # recalculated deltaCn = (current_XCorr - 2nd_best_XCorr) / max(current_XCorr, 1) "COMET:deltaLCn", # deltaLCn = (current_XCorr - worst_XCorr) / max(current_XCorr, 1) "COMET:lnExpect", # log(E-value) @@ -76,7 +87,8 @@ "MASCOT:delta_score", # delta score based on mScore "CONCAT:lnEvalue", "CONCAT:deltaLnEvalue", - "SAGE:ln(-poisson)" "SAGE:ln(delta_best)", + "SAGE:ln(-poisson)", + "SAGE:ln(delta_best)", "SAGE:ln(delta_next)", "SAGE:ln(matched_intensity_pct)", "SAGE:longest_b", @@ -88,7 +100,14 @@ class IdXMLReader(ReaderBase): - def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None: + """Reader for idXML files with comprehensive type safety and error handling.""" + + protein_ids: Any # list[oms.ProteinIdentification] + peptide_ids: Any # list[oms.PeptideIdentification] + user_params_metadata: list[str] + rescoring_features: list[str] + + def __init__(self, filename: Path | str, *args: Any, **kwargs: Any) -> None: """ Reader for idXML files. @@ -96,34 +115,50 @@ def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None: ---------- filename: str, pathlib.Path Path to idXML file. + *args + Additional positional arguments passed to the base class. + **kwargs + Additional keyword arguments passed to the base class. Examples -------- >>> from psm_utils.io import IdXMLReader >>> reader = IdXMLReader("example.idXML") >>> psm_list = [psm for psm in reader] + """ super().__init__(filename, *args, **kwargs) if not _has_openms: raise ImportError("pyOpenMS is required to read idXML files") + self.protein_ids, self.peptide_ids = self._parse_idxml() self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0]) self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0]) - def __iter__(self) -> Iterable[PSM]: - """ - Iterate over file and return PSMs one-by-one. - """ + def __iter__(self) -> Iterator[PSM]: + """Iterate over file and return PSMs one-by-one.""" for peptide_id in self.peptide_ids: for peptide_hit in peptide_id.getHits(): yield self._parse_psm(self.protein_ids, peptide_id, peptide_hit) - def _parse_idxml(self) -> Tuple[oms.ProteinIdentification, oms.PeptideIdentification]: + def _parse_idxml(self) -> tuple[Any, Any]: """ Parse idXML using pyopenms and perform sanity checks to make sure the file is not empty. + + Returns + ------- + tuple of (Any, Any) + Tuple containing (ProteinIdentification, PeptideIdentification) lists + + Raises + ------ + IdXMLReaderEmptyListException + If the idXML file contains no data to parse + """ - protein_ids, peptide_ids = [], [] - oms.IdXMLFile().load(str(self.filename), protein_ids, peptide_ids) + protein_ids: Any = [] # list[oms.ProteinIdentification] + peptide_ids: Any = [] # list[oms.PeptideIdentification] + oms.IdXMLFile().load(str(self.filename), protein_ids, peptide_ids) # type: ignore if len(protein_ids) == 0: raise IdXMLReaderEmptyListException( @@ -145,11 +180,24 @@ def _parse_peptidoform(sequence: str, charge: int) -> str: """ Parse idXML peptide to :py:class:`~psm_utils.peptidoform.Peptidoform`. + Parameters + ---------- + sequence + Peptide sequence in idXML format + charge + Precursor charge state + + Returns + ------- + str + Peptide sequence in Peptidoform format with charge + Notes ----- Implemented according to the documentation on `github.com/OpenMS/OpenMS `_ . The differentiation between square- and round bracket notation is removed after parsing. + """ sequence = MOD_PATTERN.sub(r"[\1]", sequence) if sequence[:2] == ".[": @@ -163,9 +211,9 @@ def _parse_peptidoform(sequence: str, charge: int) -> str: def _parse_psm( self, - protein_ids: oms.ProteinIdentification, - peptide_id: oms.PeptideIdentification, - peptide_hit: oms.PeptideHit, + protein_ids: Any, + peptide_id: Any, + peptide_hit: Any, ) -> PSM: """ Parse idXML :py:class:`~pyopenms.PeptideHit` to :py:class:`~psm_utils.psm.PSM`. @@ -173,6 +221,21 @@ def _parse_psm( Uses additional information from :py:class:`~pyopenms.ProteinIdentification` and :py:class:`~pyopenms.PeptideIdentification` to annotate parameters of the :py:class:`~psm_utils.psm.PSM` object. + + Parameters + ---------- + protein_ids + List of ProteinIdentification objects + peptide_id + PeptideIdentification object + peptide_hit + PeptideHit object + + Returns + ------- + PSM + Parsed PSM object with all available information + """ peptidoform = self._parse_peptidoform( peptide_hit.getSequence().toString(), peptide_hit.getCharge() @@ -188,13 +251,13 @@ def _parse_psm( } return PSM( peptidoform=peptidoform, - spectrum_id=peptide_id.getMetaValue("spectrum_reference"), + spectrum_id=peptide_id.getMetaValue(SPECTRUM_REFERENCE_KEY), run=self._get_run(protein_ids, peptide_id), is_decoy=self._is_decoy(peptide_hit), score=peptide_hit.getScore(), precursor_mz=peptide_id.getMZ(), retention_time=peptide_id.getRT(), - ion_mobility=float(im) if (im := peptide_id.getMetaValue("IM")) is not None else None, + ion_mobility=self._get_ion_mobility(peptide_hit), protein_list=[ accession.decode() for accession in peptide_hit.extractProteinAccessionsSet() ], @@ -204,66 +267,96 @@ def _parse_psm( # to original sequence in writer provenance_data={str(peptidoform): peptide_hit.getSequence().toString()}, # Store metadata of PeptideIdentification and PeptideHit objects - metadata= {**peptide_id_metadata, **peptide_hit_metadata}, + metadata={**peptide_id_metadata, **peptide_hit_metadata}, rescoring_features={ - key: float(peptide_hit.getMetaValue(key)) for key in self.rescoring_features + key: float(peptide_hit.getMetaValue(key)) # type: ignore + for key in self.rescoring_features }, ) @staticmethod - def _get_run( - protein_ids: oms.ProteinIdentification, peptide_id: oms.PeptideIdentification - ) -> str: + def _get_run(protein_ids: Any, peptide_id: Any) -> str | None: """ Get run name from idXML using pyopenms. If the idXML file contains a merge index, use it to annotate the run name without file extension. """ - if peptide_id.metaValueExists("id_merge_index"): - run = Path( - protein_ids[0] - .getMetaValue("spectra_data")[peptide_id.getMetaValue("id_merge_index")] - .decode() - ).stem - elif protein_ids[0].metaValueExists("spectra_data"): - run = Path(protein_ids[0].getMetaValue("spectra_data")[0].decode()).stem + # Check if spectra_data is available + if not protein_ids[0].metaValueExists(SPECTRA_DATA_KEY): + return None + + spectra_data = cast(list[bytes], protein_ids[0].getMetaValue(SPECTRA_DATA_KEY)) + + # Determine index to use + if peptide_id.metaValueExists(ID_MERGE_INDEX_KEY): + index = cast(int, peptide_id.getMetaValue(ID_MERGE_INDEX_KEY)) else: - run = None + index = 0 - # Convert back to None value (see writer) - if run == "None": - run = None + # Extract run path + try: + run_path = Path(spectra_data[index].decode()).stem + except (IndexError, UnicodeDecodeError): + return None + + # Handle the special case where run path is the string "None" + return None if run_path == "None" else run_path + + @staticmethod + def _get_ion_mobility(peptide_hit: Any) -> float | None: + """ + Get ion mobility from PeptideHit. - return run + Parameters + ---------- + peptide_hit + PeptideHit object + + Returns + ------- + float or None + Ion mobility value or None if not available or invalid + + """ + if not peptide_hit.metaValueExists(ION_MOBILITY_KEY): + return None - def _get_userparams_metadata(self, peptide_hit: oms.PeptideHit) -> List[str]: + im_value = peptide_hit.getMetaValue(ION_MOBILITY_KEY) + try: + return float(im_value) # type: ignore[arg-type] + except (ValueError, TypeError): + return None + + def _get_userparams_metadata(self, peptide_hit: Any) -> list[str]: """Get list of string type UserParams attached to each PeptideHit.""" # Fill the key list with all the keys from the PeptideHit # Empty list is required for the Cython wrapper to work correctly - keys = [] + keys: list[bytes] = [] peptide_hit.getKeys(keys) - keys = [ + + return [ key.decode() for key in keys if not self._is_float(peptide_hit.getMetaValue(key.decode())) ] - return keys - def _get_rescoring_features(self, peptide_hit: oms.PeptideHit) -> List[str]: + def _get_rescoring_features(self, peptide_hit: Any) -> list[str]: """Get list of rescoring features in UserParams attached to each PeptideHit.""" - keys = [] + keys: list[bytes] = [] peptide_hit.getKeys(keys) - keys = [ + + return [ key.decode() for key in keys - if self._is_float(peptide_hit.getMetaValue(key.decode())) - and key.decode() in RESCORING_FEATURE_LIST + if ( + self._is_float(peptide_hit.getMetaValue(key.decode())) + and key.decode() in RESCORING_FEATURE_LIST + ) ] - return keys @staticmethod - def _is_float(element: any) -> bool: + def _is_float(element: Any) -> bool: """Check if element can be coerced to a float.""" if element is None: return False @@ -274,22 +367,27 @@ def _is_float(element: any) -> bool: return False @staticmethod - def _is_decoy(peptide_hit: oms.PeptideHit) -> bool: + def _is_decoy(peptide_hit: Any) -> bool | None: """Check if PSM is target or decoy.""" - if peptide_hit.metaValueExists("target_decoy"): - return peptide_hit.getMetaValue("target_decoy") == "decoy" + if peptide_hit.metaValueExists(TARGET_DECOY_KEY): + return peptide_hit.getMetaValue(TARGET_DECOY_KEY) == "decoy" else: return None class IdXMLWriter(WriterBase): + """Writer for idXML files with comprehensive error handling.""" + + protein_ids: Any | None + peptide_ids: Any | None + def __init__( self, - filename: Union[str, Path], - protein_ids=None, - peptide_ids=None, - *args, - **kwargs, + filename: str | Path, + *args: Any, + protein_ids: Any | None = None, + peptide_ids: Any | None = None, + **kwargs: Any, ) -> None: """ Writer for idXML files. @@ -298,10 +396,14 @@ def __init__( ---------- filename Path to PSM file. + *args + Additional positional arguments passed to the base class. protein_ids - Optional :py:class:`~pyopenms.ProteinIdentification` object to be written to the idXML file. + Optional list of :py:class:`~pyopenms.ProteinIdentification` objects to be written to the idXML file. peptide_ids - Optional :py:class:`~pyopenms.PeptideIdentification` object to be written to the idXML file. + Optional list of :py:class:`~pyopenms.PeptideIdentification` objects to be written to the idXML file. + **kwargs + Additional keyword arguments passed to the base class. Notes ----- @@ -335,14 +437,16 @@ def __init__( super().__init__(filename, *args, **kwargs) if not _has_openms: raise ImportError("pyOpenMS is required to write idXML files") + self.protein_ids = protein_ids self.peptide_ids = peptide_ids - self._writer = None def __enter__(self) -> IdXMLWriter: + """Open file for writing and return self.""" return self def __exit__(self, *args, **kwargs) -> None: + """Close file and writer.""" pass def write_psm(self, psm: PSM): @@ -351,6 +455,11 @@ def write_psm(self, psm: PSM): This method is currently not supported (see Notes). + Parameters + ---------- + psm + PSM object to write + Raises ------ NotImplementedError @@ -366,6 +475,11 @@ def write_file(self, psm_list: PSMList) -> None: If `self.protein_ids` and `self.peptide_ids` are not None, the PSM list scores, ranks, and rescoring features will first be merged with the existing IDs from those objects. + Parameters + ---------- + psm_list + List of PSM objects to write to file + """ psm_dict = psm_list.get_psm_dict() @@ -381,7 +495,9 @@ def write_file(self, psm_list: PSMList) -> None: else: self._create_new_ids(psm_dict) - def _update_existing_ids(self, psm_dict: dict) -> None: + def _update_existing_ids( + self, psm_dict: dict[str | None, dict[str, dict[str, list[PSM]]]] + ) -> None: """ Update an existing idXML file with info from the PSM list or write a new one. @@ -389,20 +505,27 @@ def _update_existing_ids(self, psm_dict: dict) -> None: :py:class:`~pyopenms.PeptideIdentification` objects with new features from the PSMList or create new ones. """ + if not self.protein_ids or not self.peptide_ids: + raise IdXMLException( + "Both protein_ids and peptide_ids must be provided to update existing idXML." + ) # Access run name(s) from ProteinIdentification spectrum_files = [ - Path(run.decode()).stem for run in self.protein_ids[0].getMetaValue("spectra_data") + Path(run.decode()).stem + for run in cast(list[bytes], self.protein_ids[0].getMetaValue(SPECTRA_DATA_KEY)) ] for peptide_id in self.peptide_ids: if len(spectrum_files) > 1: - run = spectrum_files[peptide_id.getMetaValue("id_merge_index")] + id_merge_index = cast(int, peptide_id.getMetaValue(ID_MERGE_INDEX_KEY)) + run = spectrum_files[id_merge_index] else: run = spectrum_files[0] # Get PSM objects associated from runs since we are writing a merged idXML # NOTE: Collections with multiple protein_ids and peptide_ids is not supported try: - psms = psm_dict[None][run][peptide_id.getMetaValue("spectrum_reference")] + spectrum_ref = cast(str, peptide_id.getMetaValue(SPECTRUM_REFERENCE_KEY)) + psms = psm_dict[None][run][spectrum_ref] except KeyError as e: raise IdXMLException( "Multiple collections are not supported when parsing single pyopenms protein " @@ -410,7 +533,11 @@ def _update_existing_ids(self, psm_dict: dict) -> None: ) from e # Dict of UNIMOD peptide sequence and PSM object - hit_dict = {psm.provenance_data[str(psm.peptidoform)]: psm for psm in psms} + hit_dict = { + (psm.provenance_data or {})[str(psm.peptidoform)]: psm + for psm in psms + if psm.provenance_data and str(psm.peptidoform) in psm.provenance_data + } # Update PeptideHits according to the PSM objects updated_peptide_hits = [] for peptide_hit in peptide_id.getHits(): @@ -421,112 +548,153 @@ def _update_existing_ids(self, psm_dict: dict) -> None: peptide_id.setHits(updated_peptide_hits) - oms.IdXMLFile().store(str(self.filename), self.protein_ids, self.peptide_ids) + oms.IdXMLFile().store(str(self.filename), self.protein_ids, self.peptide_ids) # type: ignore - def _update_peptide_hit(self, peptide_hit: oms.PeptideHit, psm: PSM) -> None: - """ - Inplace update of :py:class:`~pyopenms.PeptideHit` with novel predicted features - information from :py:class:`~psm_utils.psm.PSM`. - """ + def _update_peptide_hit(self, peptide_hit: Any, psm: PSM) -> None: + """Inplace update of PeptideHit with novel predicted features information from PSM.""" + # Update core PSM attributes if psm.score is not None: peptide_hit.setScore(psm.score) if psm.rank is not None: peptide_hit.setRank(psm.rank - 1) # 1-based to 0-based if psm.qvalue is not None: - peptide_hit.setMetaValue("q-value", psm.qvalue) + peptide_hit.setMetaValue(QVALUE_KEY, psm.qvalue) if psm.pep is not None: - peptide_hit.setMetaValue("PEP", psm.pep) + peptide_hit.setMetaValue(PEP_KEY, psm.pep) - for feature, value in psm.rescoring_features.items(): - if feature not in RESCORING_FEATURE_LIST: - # Convert numpy objects to floats since pyopenms does not support numpy objects to be added - peptide_hit.setMetaValue(feature, float(value)) + # Add rescoring features (only those not in the standard list) + if psm.rescoring_features: + for feature, value in psm.rescoring_features.items(): + if feature not in RESCORING_FEATURE_LIST: + # Convert numpy objects to floats as pyopenms does not support numpy objects + peptide_hit.setMetaValue(feature, float(value)) - def _create_new_ids(self, psm_dict: dict) -> None: - """ - Create new ProteinIdentification and PeptideIdentification objects with new features from - the PSMList. - """ + def _create_new_ids(self, psm_dict: dict[str | None, dict[str, dict[str, list[PSM]]]]) -> None: + """Create new ProteinIdentification and PeptideIdentification objects with new features.""" for collection, runs in psm_dict.items(): - self.protein_ids = oms.ProteinIdentification() - self.peptide_ids = [] - - # Set msrun filename with spectra_data meta value - msrun_reference = [str(run).encode() for run in runs.keys()] - self.protein_ids.setMetaValue("spectra_data", msrun_reference) - - protein_list = [] - for run, psm_dict_run in runs.items(): - for spectrum_id, psms in psm_dict_run.items(): - protein_list.append( - [accession for psm in psms for accession in psm.protein_list] - ) - - # Fill PeptideIdentification object with PeptideHits - peptide_id = oms.PeptideIdentification() - peptide_id.setMetaValue("spectrum_reference", spectrum_id) - peptide_id.setMetaValue("id_merge_index", msrun_reference.index(str(run).encode())) - if psms[0].score is not None: - peptide_id.setScoreType("search_engine_score") - if psms[0].precursor_mz is not None: - peptide_id.setMZ(psms[0].precursor_mz) - if psms[0].retention_time is not None: - peptide_id.setRT(psms[0].retention_time) - - # Fill PeptideHits object - peptide_hits = [] - for psm in psms: - peptide_hit = oms.PeptideHit() - peptide_hit.setSequence( - oms.AASequence.fromString( - self._convert_proforma_to_unimod(psm.peptidoform) - ) - ) - peptide_hit.setCharge(psm.peptidoform.precursor_charge) - peptide_hit.setMetaValue( - "target_decoy", - "" - if psm.is_decoy is None - else ("decoy" if psm.is_decoy else "target"), - ) - if psm.qvalue is not None: - peptide_hit.setMetaValue("q-value", psm.qvalue) - if psm.pep is not None: - peptide_hit.setMetaValue("PEP", psm.pep) - if psm.rank is not None: - peptide_hit.setRank(psm.rank - 1) # 1-based to 0-based - self._add_meta_values_from_dict(peptide_hit, psm.metadata) - self._add_meta_values_from_dict(peptide_hit, psm.provenance_data) - self._add_meta_values_from_dict(peptide_hit, psm.rescoring_features) - - if psm.protein_list is not None: - for protein in psm.protein_list: - peptide_evidence = oms.PeptideEvidence() - peptide_evidence.setProteinAccession(protein) - peptide_hit.addPeptideEvidence(peptide_evidence) - - peptide_hits.append(peptide_hit) - - peptide_id.setHits(peptide_hits) - self.peptide_ids.append(peptide_id) - - # Get unique protein accessions - protein_list = list( - set([accession for proteins in protein_list for accession in proteins]) - ) - protein_hits = [] - for accession in protein_list: - protein_hit = oms.ProteinHit() - protein_hit.setAccession(accession) - protein_hits.append(protein_hit) - self.protein_ids.setHits(protein_hits) - - # Write an idXML file for each collection - oms.IdXMLFile().store( - "/".join(filter(None, [collection, str(self.filename)])), - [self.protein_ids], - self.peptide_ids, + self._create_ids_for_collection(collection, runs) + + def _create_ids_for_collection( + self, collection: str | None, runs: dict[str, dict[str, list[PSM]]] + ) -> None: + """Create ProteinIdentification and PeptideIdentification objects for a single collection.""" + self.protein_ids = [oms.ProteinIdentification()] # type: ignore + self.peptide_ids = [] + + # Set msrun filename with spectra_data meta value + msrun_reference = [str(run).encode() for run in runs.keys()] + self.protein_ids[0].setMetaValue(SPECTRA_DATA_KEY, msrun_reference) + + protein_list: list[list[str]] = [] + + for run, psm_dict_run in runs.items(): + for spectrum_id, psms in psm_dict_run.items(): + # Collect protein accessions + protein_list.append( + [accession for psm in psms for accession in (psm.protein_list or [])] + ) + + # Create PeptideIdentification + peptide_id = self._create_peptide_identification( + spectrum_id, run, msrun_reference, psms + ) + + # Create PeptideHits + peptide_hits = [self._create_peptide_hit(psm) for psm in psms] + peptide_id.setHits(peptide_hits) + self.peptide_ids.append(peptide_id) + + # Create protein hits + self._create_protein_hits(protein_list) + + # Write idXML file + filename = "/".join(filter(None, [collection, str(self.filename)])) + oms.IdXMLFile().store(filename, self.protein_ids, self.peptide_ids) # type: ignore + + def _create_peptide_identification( + self, + spectrum_id: str, + run: str, + msrun_reference: list[bytes], + psms: list[PSM], + ) -> Any: + """Create a PeptideIdentification object for a spectrum.""" + peptide_id = oms.PeptideIdentification() # type: ignore + peptide_id.setMetaValue(SPECTRUM_REFERENCE_KEY, spectrum_id) + peptide_id.setMetaValue(ID_MERGE_INDEX_KEY, msrun_reference.index(str(run).encode())) + + # Set properties from first PSM + first_psm = psms[0] + if first_psm.score is not None: + peptide_id.setScoreType(DEFAULT_SCORE_TYPE) + if first_psm.precursor_mz is not None: + peptide_id.setMZ(first_psm.precursor_mz) + if first_psm.retention_time is not None: + peptide_id.setRT(first_psm.retention_time) + + return peptide_id + + def _create_peptide_hit(self, psm: PSM) -> Any: + """Create a PeptideHit object from a PSM.""" + peptide_hit = oms.PeptideHit() # type: ignore + + # Set sequence + peptide_hit.setSequence( + oms.AASequence.fromString( # type: ignore + self._convert_proforma_to_unimod(psm.peptidoform) ) + ) + + # Set charge + if psm.peptidoform.precursor_charge is not None: + peptide_hit.setCharge(psm.peptidoform.precursor_charge) + + # Set target/decoy information + target_decoy_value = ( + "" if psm.is_decoy is None else ("decoy" if psm.is_decoy else "target") + ) + peptide_hit.setMetaValue(TARGET_DECOY_KEY, target_decoy_value) + + # Set optional values + if psm.qvalue is not None: + peptide_hit.setMetaValue(QVALUE_KEY, psm.qvalue) + if psm.pep is not None: + peptide_hit.setMetaValue(PEP_KEY, psm.pep) + if psm.rank is not None: + peptide_hit.setRank(psm.rank - 1) # 1-based to 0-based + + # Add metadata and features + if psm.metadata: + self._add_meta_values_from_dict(peptide_hit, psm.metadata) + if psm.provenance_data: + self._add_meta_values_from_dict(peptide_hit, psm.provenance_data) + if psm.rescoring_features: + self._add_meta_values_from_dict(peptide_hit, psm.rescoring_features) + + # Add protein evidence + if psm.protein_list is not None: + for protein in psm.protein_list: + peptide_evidence = oms.PeptideEvidence() # type: ignore + peptide_evidence.setProteinAccession(protein) + peptide_hit.addPeptideEvidence(peptide_evidence) + + return peptide_hit + + def _create_protein_hits(self, protein_list: list[list[str]]) -> None: + """Create protein hits from collected protein accessions.""" + # Get unique protein accessions + unique_proteins = list( + {accession for protein_sublist in protein_list for accession in protein_sublist} + ) + + protein_hits = [] + for accession in unique_proteins: + protein_hit = oms.ProteinHit() # type: ignore + protein_hit.setAccession(accession) + protein_hits.append(protein_hit) + + if self.protein_ids and len(self.protein_ids) > 0: + self.protein_ids[0].setHits(protein_hits) def _convert_proforma_to_unimod(self, peptidoform: Peptidoform) -> str: """Convert a peptidoform sequence in proforma notation to UNIMOD notation.""" @@ -546,23 +714,29 @@ def _convert_proforma_to_unimod(self, peptidoform: Peptidoform) -> str: return sequence - def _add_meta_values_from_dict(self, peptide_hit: oms.PeptideHit, d: dict) -> None: + def _add_meta_values_from_dict(self, peptide_hit: Any, d: dict[str, Any] | None) -> None: """Add meta values inplace to :py:class:`~pyopenms.PeptideHit` from a dictionary.""" - if d is not None: - for key, value in d.items(): - # Convert numpy objects to floats since pyopenms does not support numpy objects to be added - if not isinstance(value, str): + if d is None: + return + + for key, value in d.items(): + # Convert numpy objects to floats since pyopenms does not support numpy objects + if not isinstance(value, str): + try: value = float(value) - peptide_hit.setMetaValue(key, value) + except (ValueError, TypeError): + # Skip values that cannot be converted + continue + peptide_hit.setMetaValue(key, value) -class IdXMLException(PSMUtilsException): - """Exception in psm_utils.io.IdXML""" +class IdXMLException(PSMUtilsIOException): + """Exception in psm_utils.io.IdXML.""" pass -class IdXMLReaderEmptyListException(PSMUtilsException): - """Exception in psm_utils.io.IdXMLReader""" +class IdXMLReaderEmptyListException(PSMUtilsIOException): + """Exception in psm_utils.io.IdXMLReader.""" pass diff --git a/psm_utils/io/ionbot.py b/psm_utils/io/ionbot.py index 43ad511..79b0e9b 100644 --- a/psm_utils/io/ionbot.py +++ b/psm_utils/io/ionbot.py @@ -8,15 +8,14 @@ import csv import re +from collections.abc import Iterator from pathlib import Path -from typing import Dict, Iterable, Union from psm_utils.io._base_classes import ReaderBase +from psm_utils.io._utils import set_csv_field_size_limit from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList -from psm_utils.io._utils import set_csv_field_size_limit set_csv_field_size_limit() @@ -36,6 +35,8 @@ class IonbotReader(ReaderBase): + """Reader for ionbot PSM files.""" + def __init__( self, filename: str | Path, @@ -47,12 +48,15 @@ def __init__( Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. Examples -------- - IonbotReader supports iteration: >>> from psm_utils.io.ionbot import IonbotReader @@ -70,68 +74,115 @@ def __init__( """ super().__init__(filename, *args, **kwargs) - self.filename = filename - def __iter__(self) -> Iterable[PSM]: - """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "rt") as open_file: + def __iter__(self) -> Iterator[PSM]: + """ + Iterate over file and return PSMs one-by-one. + + Yields + ------ + PSM + Individual PSM objects from the ionbot CSV file. + + Raises + ------ + FileNotFoundError + If the specified file does not exist. + csv.Error + If there are issues reading the CSV file. + InvalidIonbotModificationError + If modification parsing fails. + + """ + with open(self.filename) as open_file: reader = csv.DictReader(open_file, delimiter=",") for row in reader: yield self._get_peptide_spectrum_match(row) - def read_file(self) -> PSMList: - """Read full PSM file into a PSMList object.""" - return PSMList(psm_list=[psm for psm in self]) - - def _get_peptide_spectrum_match(self, psm_dict: Dict[str, str | float]) -> PSM: - return PSM( - peptidoform=self._parse_peptidoform( - psm_dict["matched_peptide"], - psm_dict["modifications"], - psm_dict["charge"], - ), - spectrum_id=psm_dict["spectrum_title"], - run=psm_dict["spectrum_file"], - is_decoy=( - True - if psm_dict["database"] == "D" - else False if psm_dict["database"] == "T" else None - ), - score=float(psm_dict["psm_score"]), - precursor_mz=float(psm_dict["m/z"]), - retention_time=float(psm_dict["observed_retention_time"]), - protein_list=psm_dict["proteins"].split("||"), - source="ionbot", - qvalue=float(psm_dict["q-value"]), - pep=float(psm_dict["PEP"]), - provenance_data=({"ionbot_filename": str(self.filename)}), - metadata={ - col: str(psm_dict[col]) for col in psm_dict.keys() if col not in REQUIRED_COLUMNS - }, - ) + def _get_peptide_spectrum_match(self, psm_dict: dict[str, str]) -> PSM: + """Convert a dictionary row from ionbot CSV to a PSM object.""" + try: + return PSM( + peptidoform=self._parse_peptidoform( + psm_dict["matched_peptide"], + psm_dict["modifications"], + psm_dict["charge"], + ), + spectrum_id=psm_dict["spectrum_title"], + run=psm_dict["spectrum_file"], + is_decoy=( + True + if psm_dict["database"] == "D" + else False + if psm_dict["database"] == "T" + else None + ), + score=float(psm_dict["psm_score"]), + precursor_mz=float(psm_dict["m/z"]), + retention_time=float(psm_dict["observed_retention_time"]), + protein_list=psm_dict["proteins"].split("||"), + source="ionbot", + qvalue=float(psm_dict["q-value"]), + pep=float(psm_dict["PEP"]), + provenance_data={"ionbot_filename": str(self.filename)}, + metadata={ + col: str(psm_dict[col]) + for col in psm_dict.keys() + if col not in REQUIRED_COLUMNS + }, + ) + except KeyError as e: + raise PSMUtilsIOException(f"Missing required column in ionbot file: {e}") from e + except ValueError as e: + raise PSMUtilsIOException(f"Error parsing numeric value in ionbot file: {e}") from e @staticmethod - def _parse_peptidoform( - peptide: str, modifications: str, charge: Union[str, int] - ) -> Peptidoform: + def _parse_peptidoform(peptide: str, modifications: str, charge: str | int) -> Peptidoform: """Parse peptide, modifications, and charge to Peptidoform.""" # Split peptide into list of amino acids with termini - peptide = peptide = [""] + list(peptide) + [""] + peptide_list: list[str] = [""] + list(peptide) + [""] # Add modifications - pattern = re.compile(r"^(?P\[\S*?\])?(?P.*?)(?P\[\S*?\])?$") - for position, label in zip(modifications.split("|")[::2], modifications.split("|")[1::2]): - mod_match = pattern.search(label) - if mod_match.group("U"): - parsed_label = "U:" + mod_match.group("U")[1:-1] - else: - parsed_label = mod_match.group("mod") - peptide[int(position)] += f"[{parsed_label}]" + pattern: re.Pattern[str] = re.compile(r"^(?P\[\S*?\])?(?P.*?)(?P\[\S*?\])?$") + + if modifications: # Handle empty modifications string + mod_parts = modifications.split("|") + if len(mod_parts) % 2 != 0: + raise InvalidIonbotModificationError( + f"Invalid modification string format: '{modifications}'. " + "Expected even number of parts (position|label pairs)." + ) + + for position_str, label in zip(mod_parts[::2], mod_parts[1::2]): + mod_match = pattern.search(label) + if not mod_match: + raise InvalidIonbotModificationError( + f"Invalid modification format '{label}' at position {position_str} in " + f"'{modifications}'." + ) + + try: + position = int(position_str) + except ValueError as e: + raise InvalidIonbotModificationError( + f"Invalid position '{position_str}' in modifications '{modifications}'" + ) from e + + if position < 0 or position >= len(peptide_list): + raise InvalidIonbotModificationError( + f"Position {position} out of range for peptide '{peptide}' (length {len(peptide_list) - 2})" + ) + + if mod_match.group("U"): + parsed_label = "U:" + mod_match.group("U")[1:-1] + else: + parsed_label = mod_match.group("mod") + peptide_list[position] += f"[{parsed_label}]" # Add terminal modifications - peptide[0] = peptide[0] + "-" if peptide[0] else "" - peptide[-1] = "-" + peptide[-1] if peptide[-1] else "" - proforma_seq = "".join(peptide) + peptide_list[0] = peptide_list[0] + "-" if peptide_list[0] else "" + peptide_list[-1] = "-" + peptide_list[-1] if peptide_list[-1] else "" + proforma_seq = "".join(peptide_list) # Add charge state proforma_seq += f"/{charge}" @@ -140,6 +191,13 @@ def _parse_peptidoform( class InvalidIonbotModificationError(PSMUtilsIOException): - """Invalid Peptide Record modification.""" + """ + Exception raised when ionbot modification parsing fails. + + This exception is raised when: + - Modification format is invalid + - Position values are out of range + - Modification string structure is malformed + """ pass diff --git a/psm_utils/io/maxquant.py b/psm_utils/io/maxquant.py index f51aaf6..46f8907 100644 --- a/psm_utils/io/maxquant.py +++ b/psm_utils/io/maxquant.py @@ -5,6 +5,7 @@ import csv import logging import re +from collections.abc import Iterator, Sequence from itertools import compress from pathlib import Path @@ -12,9 +13,9 @@ from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase +from psm_utils.io._utils import set_csv_field_size_limit from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM -from psm_utils.io._utils import set_csv_field_size_limit set_csv_field_size_limit() @@ -44,19 +45,20 @@ def __init__( **kwargs, ) -> None: """ - Reader for MaxQuant msms.txt PSM files. + Initialize reader for MaxQuant msms.txt PSM files. Parameters ---------- - filename: str, pathlib.Path - Path to PSM file. - decoy_prefix: str, optional - Protein name prefix used to denote decoy protein entries. Default: - ``"DECOY_"``. + filename + Path to the MaxQuant msms.txt PSM file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. Examples -------- - :py:class:`MSMSReader` supports iteration: + MSMSReader supports iteration: >>> from psm_utils.io.maxquant import MSMSReader >>> for psm in MSMSReader("msms.txt"): @@ -66,20 +68,18 @@ def __init__( GANLGEMTNAGIPVPPGFC[+57.022]VTAEAYK ... - Or a full file can be read at once into a - :py:class:`~psm_utils.psm_list.PSMList` object: + Or a full file can be read at once into a :py:class:`~psm_utils.psm_list.PSMList` + object: >>> reader = MSMSReader("msms.txt") >>> psm_list = reader.read_file() """ - super().__init__(filename, *args, **kwargs) - self._validate_msms() - def __iter__(self): - """Iterate over file and return PSMs one-by-one""" + def __iter__(self) -> Iterator[PSM]: + """Iterate over file and return PSMs one-by-one.""" with open(self.filename) as msms_in: reader = csv.DictReader(msms_in, delimiter="\t") for psm_dict in reader: @@ -87,13 +87,16 @@ def __iter__(self): yield psm def _validate_msms(self) -> None: - with open(self.filename, "r") as msms_file: + """Validate that the msms.txt file contains required columns.""" + with open(self.filename) as msms_file: msms_reader = csv.DictReader(msms_file, delimiter="\t") self._evaluate_columns(msms_reader.fieldnames) @staticmethod - def _evaluate_columns(columns) -> bool: - """Case insensitive column evaluation msms file.""" + def _evaluate_columns(columns: Sequence[str] | None) -> None: + """Case insensitive column evaluation for msms file.""" + if columns is None: + raise MSMSParsingError("MSMS file does not contain any columns.") columns = list(map(lambda col: col.lower(), columns)) column_check = [True if col.lower() in columns else False for col in MSMS_REQUIRED_COLUMNS] if not all(column_check): @@ -101,11 +104,12 @@ def _evaluate_columns(columns) -> bool: f"Missing columns: {list(compress(MSMS_REQUIRED_COLUMNS, list(~np.array(column_check))))}" ) - def _get_peptide_spectrum_match(self, psm_dict: dict[str, str | float]) -> PSM: + def _get_peptide_spectrum_match(self, psm_dict: dict[str, str]) -> PSM: """Return a PSM object from MaxQuant msms.txt PSM file.""" - psm = PSM( - peptidoform=self._parse_peptidoform(psm_dict["Modified sequence"], psm_dict["Charge"]), + peptidoform=self._parse_peptidoform( + psm_dict["Modified sequence"], int(psm_dict["Charge"]) + ), spectrum_id=psm_dict["Scan number"], run=psm_dict["Raw file"], is_decoy=psm_dict["Reverse"] == "+", @@ -126,8 +130,7 @@ def _get_peptide_spectrum_match(self, psm_dict: dict[str, str | float]) -> PSM: @staticmethod def _parse_peptidoform(modified_seq: str, charge: int) -> Peptidoform: - """Parse modified sequence to :py:class:`~psm_utils.peptidoform.Peptidoform`.""" - + """Parse modified sequence to Peptidoform.""" # pattern to match open and closed round brackets pattern = re.compile(r"\(((?:[^)(]+|\((?:[^)(]+|\([^)(]*\))*\))*)\)") modified_seq = modified_seq.strip("_") diff --git a/psm_utils/io/msamanda.py b/psm_utils/io/msamanda.py index 67d9ccb..d329ac3 100644 --- a/psm_utils/io/msamanda.py +++ b/psm_utils/io/msamanda.py @@ -5,19 +5,20 @@ import csv import logging import re +from collections.abc import Iterator, Sequence from itertools import compress from pathlib import Path import numpy as np -from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase -from psm_utils.psm import PSM, Peptidoform from psm_utils.io._utils import set_csv_field_size_limit +from psm_utils.io.exceptions import PSMUtilsIOException +from psm_utils.psm import PSM, Peptidoform set_csv_field_size_limit() -logger = logging.getLogger(__name__) +LOGGER = logging.getLogger(__name__) # Minimal set of required columns @@ -49,18 +50,31 @@ class MSAmandaReader(ReaderBase): - """Reader for psm_utils TSV format.""" + """Reader for MS Amanda CSV result files.""" def __init__(self, filename: str | Path, *args, **kwargs) -> None: + """ + Initialize reader for MS Amanda CSV result files. + + Parameters + ---------- + filename + Path to the MS Amanda CSV file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. + + """ super().__init__(filename, *args, **kwargs) - self._present_columns = REQUIRED_COLUMNS.copy() - self._rescoring_feature_columns = [] - self._metadata_columns = [] - self._has_rank_column = None + self._present_columns: list[str] = REQUIRED_COLUMNS.copy() + self._rescoring_feature_columns: list[str] = [] + self._metadata_columns: list[str] = [] + self._has_rank_column: bool | None = None - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "rt") as open_file: + with open(self.filename) as open_file: if not next(open_file).startswith("#"): open_file.seek(0) reader = csv.DictReader(open_file, delimiter="\t") @@ -68,8 +82,11 @@ def __iter__(self): for psm_dict in reader: yield self._get_peptide_spectrum_match(psm_dict) - def _evaluate_columns(self, columns) -> bool: - """Column evaluation for MS Amanda file.""" + def _evaluate_columns(self, columns: Sequence[str] | None) -> None: + """Evaluate and validate columns from MS Amanda file header.""" + if columns is None: + raise MSAmandaParsingError("MS Amanda file does not contain any columns.") + # Check if required columns are present column_check = [True if col in columns else False for col in REQUIRED_COLUMNS] if not all(column_check): @@ -91,7 +108,7 @@ def _evaluate_columns(self, columns) -> bool: if col not in self._present_columns + self._rescoring_feature_columns ] - def _get_peptide_spectrum_match(self, psm_dict: dict[str, str | float]) -> PSM: + def _get_peptide_spectrum_match(self, psm_dict: dict[str, str]) -> PSM: """Return a PSM object from MS Amanda CSV PSM file.""" psm = PSM( peptidoform=self._parse_peptidoform( @@ -120,8 +137,8 @@ def _get_peptide_spectrum_match(self, psm_dict: dict[str, str | float]) -> PSM: return psm @staticmethod - def _parse_peptidoform(seq, modifications, charge): - "Parse MSAmanda sequence, modifications and charge to proforma sequence" + def _parse_peptidoform(seq: str, modifications: str, charge: str) -> Peptidoform: + """Parse MSAmanda sequence, modifications and charge to ProForma sequence.""" peptide = [""] + [aa.upper() for aa in seq] + [""] pattern = re.compile( r"(?:(?:(?P[A-Z])(?P\d+))|(?P[CN]-Term))\((?P[^|()]+)\|(?P[-0-9.]+)\|(?Pvariable|fixed)\);?" @@ -129,12 +146,12 @@ def _parse_peptidoform(seq, modifications, charge): for match in pattern.finditer(modifications): if match.group("term") == "N-Term": - peptide[0] = peptide[0] + f'[{match.group("mod_name")}]' + peptide[0] = peptide[0] + f"[{match.group('mod_name')}]" elif match.group("term") == "C-Term": - peptide[-1] = peptide[-1] + f'[{match.group("mod_name")}]' + peptide[-1] = peptide[-1] + f"[{match.group('mod_name')}]" if match.group("loc") is not None: peptide[int(match.group("loc"))] = ( - peptide[int(match.group("loc"))] + f'[{match.group("mod_name")}]' + peptide[int(match.group("loc"))] + f"[{match.group('mod_name')}]" ) peptide[0] = peptide[0] + "-" if peptide[0] else "" @@ -144,7 +161,7 @@ def _parse_peptidoform(seq, modifications, charge): return Peptidoform(proforma_seq + f"/{charge}") -class MSAmandaParsingError(PSMUtilsException): - """Error while parsing MS Amanda CSV PSM file.""" +class MSAmandaParsingError(PSMUtilsIOException): + """Error in parsing MS Amanda CSV file.""" pass diff --git a/psm_utils/io/mzid.py b/psm_utils/io/mzid.py index e10883b..9a5ca50 100644 --- a/psm_utils/io/mzid.py +++ b/psm_utils/io/mzid.py @@ -1,5 +1,5 @@ """ -Reader and writers for the HUPO-PSI mzIdentML format. +Reader and writer for HUPO-PSI mzIdentML format PSM files. See `psidev.info/mzidentml `_ for more info on the format. @@ -11,11 +11,12 @@ import logging import re import xml.etree.ElementTree as ET +from collections.abc import Iterator from pathlib import Path -from typing import Union +from typing import Any, cast -from psims.mzid import MzIdentMLWriter -from pyteomics import mzid, proforma +from psims.mzid import MzIdentMLWriter # type: ignore[import] +from pyteomics import mzid # type: ignore[import] from rich.progress import Progress from psm_utils import __version__ @@ -89,21 +90,28 @@ class MzidReader(ReaderBase): - def __init__(self, filename: str | Path, *args, score_key: str = None, **kwargs) -> None: + """Reader for HUPO-PSI mzIdentML format PSM files.""" + + def __init__( + self, filename: str | Path, *args: Any, score_key: str | None = None, **kwargs: Any + ) -> None: """ - Reader for mzIdentML PSM files. + Reader for HUPO-PSI mzIdentML format PSM files. Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. - score_key: str, optional + *args + Additional positional arguments passed to parent class. + score_key Name of the score metric to use as PSM score. If not provided, the score metric is inferred from the file if one of the child parameters of ``MS:1001143`` is present. + **kwargs + Additional keyword arguments passed to parent class. Examples -------- - MzidReader supports iteration: >>> from psm_utils.io.mzid import MzidReader @@ -134,31 +142,39 @@ def __init__(self, filename: str | Path, *args, score_key: str = None, **kwargs) super().__init__(filename, *args, **kwargs) self._non_metadata_keys = ["ContactRole", "passThreshold"] self._score_key = score_key - self._rt_key = None - self._spectrum_rt_key = None - self._qvalue_key = None - self._pep_key = None - self._im_key = None + self._rt_key: str | None = None + self._spectrum_rt_key: str | None = None + self._qvalue_key: str | None = None + self._pep_key: str | None = None + self._im_key: str | None = None self._source = self._infer_source() - def __iter__(self): - """Iterate over file and return PSMs one-by-one.""" - with mzid.read(str(self.filename)) as reader: + def __iter__(self) -> Iterator[PSM]: + """Iterate over mzIdentML file and return PSMs one-by-one.""" + with mzid.MzIdentML(str(self.filename)) as reader: + first_entry = next(reader) # Parse spectrum metadata - self._get_toplevel_non_metadata_keys(reader[0].keys()) + self._get_toplevel_non_metadata_keys(first_entry.keys()) # Parse PSM non-metadata keys, rt key and score key - self._get_non_metadata_keys(reader[0]["SpectrumIdentificationItem"][0].keys()) + self._get_non_metadata_keys(first_entry["SpectrumIdentificationItem"][0].keys()) + with mzid.MzIdentML(str(self.filename)) as reader: for spectrum in reader: # Parse spectrum metadata spectrum_id = spectrum["spectrumID"] - spectrum_title = ( - spectrum["spectrum title"] if "spectrum title" in spectrum else None + spectrum_title = spectrum.get("spectrum title") + run = Path(spectrum["location"]).stem if spectrum.get("location") else None + rt = ( + float(spectrum[self._spectrum_rt_key]) + if self._spectrum_rt_key and self._spectrum_rt_key in spectrum + else None + ) + ion_mobility = ( + float(spectrum[self._im_key]) + if self._im_key and self._im_key in spectrum + else None ) - run = Path(spectrum["location"]).stem if "location" in spectrum else None - rt = float(spectrum[self._spectrum_rt_key]) if self._spectrum_rt_key else None - ion_mobility = float(spectrum[self._im_key]) if self._im_key else None # Parse PSMs from spectrum for entry in spectrum["SpectrumIdentificationItem"]: @@ -167,24 +183,29 @@ def __iter__(self): ) @staticmethod - def _get_xml_namespace(root_tag): - """Get the namespace of the xml root.""" + def _get_xml_namespace(root_tag: str) -> str: + """Extract XML namespace from root tag.""" m = re.match(r"\{.*\}", root_tag) return m.group(0) if m else "" - def _infer_source(self): - """Get the source of the mzid file.""" + def _infer_source(self) -> str | None: + """Infer search engine source from mzIdentML file metadata.""" mzid_xml = ET.parse(self.filename) root = mzid_xml.getroot() name_space = self._get_xml_namespace(root.tag) + software = root.find(f".//{name_space}AnalysisSoftware") + if software is None: + return None try: - return root.find(f".//{name_space}AnalysisSoftware").attrib["name"] + return software.attrib["name"] except KeyError: return None @staticmethod - def _parse_peptidoform(seq: str, modification_list: list[dict], charge: Union[int, None]): - """Parse mzid sequence and modifications to Peptidoform.""" + def _parse_peptidoform( + seq: str, modification_list: list[dict[str, Any]], charge: int | None + ) -> Peptidoform: + """Parse mzIdentML sequence and modifications into Peptidoform object.""" peptide = [""] + list(seq) + [""] # Add modification labels @@ -203,51 +224,47 @@ def _parse_peptidoform(seq: str, modification_list: list[dict], charge: Union[in return Peptidoform(proforma_seq) @staticmethod - def _parse_peptide_evidence_ref(peptide_evidence_list: list[dict]): + def _parse_peptide_evidence_ref( + peptide_evidence_list: list[dict[str, Any]], + ) -> tuple[bool, list[str]]: """ - Parse PeptideEvidence list of PSM. - - Notes - ----- - If multiple PeptideEvidence entries are associated with the PSM, the PSM is only considered - a decoy entry if ALL PeptideEvidence entries are decoy entries. If a target PeptideEvidence - entry is present, it should get priority over decoy entries. In theory, no overlap between - target and decoy peptide sequence should be present in the search space, although this - might not have been filtered for by the search engine. + Parse PeptideEvidence list to determine decoy status and protein accessions. + PSM is considered decoy only if ALL PeptideEvidence entries are decoy. """ - isdecoy = all( - [entry["isDecoy"] if "isDecoy" in entry else None for entry in peptide_evidence_list] - ) - protein_list = [d["accession"] for d in peptide_evidence_list if "accession" in d.keys()] - return isdecoy, protein_list + is_decoy = all(entry.get("isDecoy", False) for entry in peptide_evidence_list) + protein_list = [ + accession + for d in peptide_evidence_list + if (accession := d.get("accession")) is not None + ] + return is_decoy, protein_list def _get_peptide_spectrum_match( self, spectrum_id: str, - spectrum_title: Union[str, None], - run: Union[str, None], - rt: Union[float, None], - ion_mobility: Union[float, None], - spectrum_identification_item: dict[str, str | float | list], + spectrum_title: str | None, + run: str | None, + rt: float | None, + ion_mobility: float | None, + spectrum_identification_item: dict[str, Any], ) -> PSM: - """Parse single mzid entry to :py:class:`~psm_utils.peptidoform.Peptidoform`.""" + """Parse single mzIdentML SpectrumIdentificationItem into PSM object.""" sii = spectrum_identification_item - try: - modifications = sii["Modification"] - except KeyError: - modifications = [] - sequence = sii["PeptideSequence"] - charge = sii["chargeState"] if "chargeState" in sii else None + modifications = cast(list[dict[str, Any]], sii.get("Modification", [])) + sequence = cast(str, sii["PeptideSequence"]) + charge_value = sii.get("chargeState") + charge = int(charge_value) if charge_value is not None else None peptidoform = self._parse_peptidoform(sequence, modifications, charge) - is_decoy, protein_list = self._parse_peptide_evidence_ref(sii["PeptideEvidenceRef"]) - try: - precursor_mz = sii["experimentalMassToCharge"] - except KeyError: - precursor_mz = None + is_decoy, protein_list = self._parse_peptide_evidence_ref( + cast(list[dict[str, Any]], sii["PeptideEvidenceRef"]) + ) + + precursor_mz_value = sii.get("experimentalMassToCharge") + precursor_mz = float(precursor_mz_value) if precursor_mz_value is not None else None # Override spectrum-level RT if present at PSM level - if self._rt_key: + if self._rt_key and self._rt_key in sii: rt = float(sii[self._rt_key]) metadata = {col: str(sii[col]) for col in sii.keys() if col not in self._non_metadata_keys} @@ -259,30 +276,45 @@ def _get_peptide_spectrum_match( else: psm_spectrum_id = spectrum_id - try: - score = sii[self._score_key] - except KeyError: - score = None + score = None + if self._score_key: + score_value = sii.get(self._score_key) + score = float(score_value) if score_value is not None else None + + # Calculate qvalue and pep with cleaner logic + qvalue = None + if self._qvalue_key: + qvalue_raw = sii.get(self._qvalue_key) + qvalue = float(qvalue_raw) if qvalue_raw is not None else None + + pep = None + if self._pep_key: + pep_raw = sii.get(self._pep_key) + pep = float(pep_raw) if pep_raw is not None else None + + rank_value = sii.get("rank") + rank = int(rank_value) if rank_value is not None else None + psm = PSM( peptidoform=peptidoform, spectrum_id=psm_spectrum_id, run=run, is_decoy=is_decoy, score=score, - qvalue=sii[self._qvalue_key] if self._qvalue_key else None, - pep=sii[self._pep_key] if self._pep_key else None, + qvalue=qvalue, + pep=pep, precursor_mz=precursor_mz, retention_time=rt, ion_mobility=ion_mobility, protein_list=protein_list, - rank=sii["rank"] if "rank" in sii else None, + rank=rank, source=self._source, provenance_data={"mzid_filename": str(self.filename)}, metadata=metadata, ) return psm - def _get_non_metadata_keys(self, keys: list): + def _get_non_metadata_keys(self, keys: list[str]) -> None: """Gather all the keys at PSM-level that should not be written to metadata.""" # All keys required to create PSM object default_keys = [ @@ -323,8 +355,8 @@ def _get_non_metadata_keys(self, keys: list): # Keys that are not necessary for metadata self._non_metadata_keys.extend(default_keys) - def _get_toplevel_non_metadata_keys(self, keys: list): - """Gather all keys at spectrum-level that should not be written to metadata.""" + def _get_toplevel_non_metadata_keys(self, keys: list[str]) -> None: + """Identify spectrum-level keys that should not be written to PSM metadata.""" # Check if RT is encoded in spectrum metadata for key in ["retention time", "scan start time"]: if key in keys: @@ -340,30 +372,29 @@ def _get_toplevel_non_metadata_keys(self, keys: list): break @staticmethod - def _infer_score_name(keys) -> str: - """Infer the score from the list of known PSM scores.""" + def _infer_score_name(keys: list[str]) -> str | None: + """Infer search engine score name from available PSM keys.""" lower_keys = {key.lower(): key for key in keys} for score in STANDARD_SEARCHENGINE_SCORES: if score in lower_keys: return lower_keys[score] + return None @staticmethod - def _infer_qvalue_name(keys) -> Union[str, None]: - """Infer the q-value term from the list of known terms.""" + def _infer_qvalue_name(keys: list[str]) -> str | None: + """Infer q-value field name from available PSM keys.""" for qvalue in Q_VALUE_TERMS: if qvalue in keys: return qvalue - else: - return None + return None @staticmethod - def _infer_pep_name(keys) -> Union[str, None]: - """Infer the PEP term from the list of known terms.""" + def _infer_pep_name(keys: list[str]) -> str | None: + """Infer PEP (Posterior Error Probability) field name from available PSM keys.""" for pep in PEP_TERMS: if pep in keys: return pep - else: - return None + return None class MzidWriter(WriterBase): @@ -372,10 +403,10 @@ class MzidWriter(WriterBase): def __init__( self, filename: str | Path, - *args, + *args: Any, show_progressbar: bool = False, - **kwargs, - ): + **kwargs: Any, + ) -> None: """ Writer for mzIdentML PSM files. @@ -383,8 +414,12 @@ def __init__( ---------- filename: str, Pathlib.Path Path to PSM file. + *args + Additional positional argument passed to parent class. show_progressbar: bool, optional Show progress bar for conversion process. (default: False) + **kwargs + Additional keyword arguments passed to parent class. Notes ----- @@ -404,9 +439,11 @@ def __init__( self._writer = None def __enter__(self) -> MzidWriter: + """Open file for writing and return self.""" return self def __exit__(self, *args, **kwargs) -> None: + """Close file and writer.""" pass def write_psm(self, psm: PSM): @@ -423,8 +460,8 @@ def write_psm(self, psm: PSM): """ raise NotImplementedError("MzidWriter currently does not support write_psm.") - def write_file(self, psm_list: PSMList): - """Write entire PSMList to mzid file.""" + def write_file(self, psm_list: PSMList) -> None: + """Write entire PSMList to mzIdentML file.""" file = open(self.filename, "wb") with Progress(disable=not self.show_progressbar) as progress: with MzIdentMLWriter(file, close=True) as writer: @@ -526,10 +563,12 @@ def write_file(self, psm_list: PSMList): ) @staticmethod - def _create_peptide_object(peptidoform): + def _create_peptide_object(peptidoform: Peptidoform) -> dict[str, Any]: """Create mzid peptide object from Peptidoform.""" - def parse_modifications(modifications: list[proforma.TagBase], location: int): + def parse_modifications( + modifications: list[Any] | None, location: int + ) -> list[dict[str, Any]]: modification_list = [] if modifications: for mod in modifications: @@ -537,15 +576,15 @@ def parse_modifications(modifications: list[proforma.TagBase], location: int): modification_list.append( { "location": location, - "name": mod.name, - "monoisotopic_mass_delta": mod.mass, + "name": mod.name, # type: ignore[attr-defined] + "monoisotopic_mass_delta": mod.mass, # type: ignore[attr-defined] } ) except AttributeError: modification_list.append( { "location": location, - "monoisotopic_mass_delta": mod.mass, + "monoisotopic_mass_delta": mod.mass, # type: ignore[attr-defined] } ) return modification_list @@ -554,9 +593,11 @@ def parse_modifications(modifications: list[proforma.TagBase], location: int): modifications = [] for loc, (aa, mods) in enumerate(peptidoform.parsed_sequence, start=1): modifications.extend(parse_modifications(mods, loc)) - modifications.extend(parse_modifications(peptidoform.properties["n_term"], 0)) + modifications.extend(parse_modifications(peptidoform.properties.get("n_term"), 0)) modifications.extend( - parse_modifications(peptidoform.properties["c_term"], len(peptidoform.sequence) + 1) + parse_modifications( + peptidoform.properties.get("c_term"), len(peptidoform.sequence) + 1 + ) ) peptide_object = { @@ -567,7 +608,7 @@ def parse_modifications(modifications: list[proforma.TagBase], location: int): return peptide_object - def _transform_search_database(self): + def _transform_search_database(self) -> dict[str, Any]: """Create mzid database object.""" # TODO: Create this and link with protein object when fasta file is provided return { @@ -579,15 +620,17 @@ def _transform_search_database(self): } @staticmethod - def _transform_spectra_data(spec_id_dict: dict): - """Get all the unique spectra data from PSMList spectrum id dict.""" - collection_run_id_dict = {} + def _transform_spectra_data( + spec_id_dict: dict[str, Any], + ) -> tuple[list[dict[str, Any]], dict[str, int]]: + """Get all unique spectra data from PSMList spectrum id dict.""" + collection_run_id_dict: dict[str, int] = {} spectra_data = [] i = 1 - for collection in spec_id_dict.keys(): - for run in spec_id_dict[collection].keys(): + for collection in spec_id_dict: + for run in spec_id_dict[collection]: collection_run_id = "/".join(filter(None, [collection, run])) - if collection_run_id not in collection_run_id_dict.keys(): + if collection_run_id not in collection_run_id_dict: collection_run_id_dict[collection_run_id] = i spectra_data_object = { "id": i, @@ -595,28 +638,30 @@ def _transform_spectra_data(spec_id_dict: dict): "spectrum_id_format": "multiple peak list nativeID format", # 'file_format': #TODO can we infer this? } - spectra_data.append(spectra_data_object) + spectra_data.append(spectra_data_object) + i += 1 return spectra_data, collection_run_id_dict @staticmethod - def _transform_spectrum_identification_item(candidate_psm): + def _transform_spectrum_identification_item( + candidate_psm: dict[str, Any], + ) -> list[dict[str, Any]]: """Create SpectrumIdentificationItem for each candidate PSM.""" peptide = candidate_psm["peptidoform"].proforma - if candidate_psm["metadata"]: - params = [{k: v} for k, v in candidate_psm["metadata"].items()] - else: - params = [] + params = list(candidate_psm["metadata"].items()) if candidate_psm.get("metadata") else [] + params = [{k: v} for k, v in params] for key, label, unit in [ ("retention_time", "retention time", "second"), ("qvalue", "PSM-level q-value", None), ("pep", "PSM-level local FDR", None), ]: - if candidate_psm[key]: + value = candidate_psm.get(key) + if value is not None: + param = {"name": label, "value": value} if unit: - params.append({"name": label, "value": candidate_psm[key], "unit_name": unit}) - else: - params.append({"name": label, "value": candidate_psm[key]}) + param["unit_name"] = unit + params.append(param) candidate_psm_dict = { "charge_state": candidate_psm["peptidoform"].precursor_charge, @@ -637,8 +682,10 @@ def _transform_spectrum_identification_item(candidate_psm): items.append(dict(candidate_psm_dict, **protein_specific_items)) return items - def _transform_spectrum_identification_result(self, spec_id, identified_psms, spectra_data_id): - """Create mzid SpectrumIdentificationResult object for PSMs that match the same spectrum.""" + def _transform_spectrum_identification_result( + self, spec_id: str, identified_psms: list[dict[str, Any]], spectra_data_id: int + ) -> dict[str, Any]: + """Create mzid SpectrumIdentificationResult object for spectrum PSMs.""" spectrum_id_result = { "id": f"SIR_{spec_id}", "spectrum_id": spec_id, diff --git a/psm_utils/io/parquet.py b/psm_utils/io/parquet.py index 755cfcc..f53bac1 100644 --- a/psm_utils/io/parquet.py +++ b/psm_utils/io/parquet.py @@ -9,11 +9,11 @@ from __future__ import annotations +from collections.abc import Iterator from pathlib import Path -from typing import Union -import pyarrow as pa -import pyarrow.parquet as pq +import pyarrow as pa # type: ignore[import-untyped] +import pyarrow.parquet as pq # type: ignore[import-untyped] from pydantic import ValidationError from psm_utils.io._base_classes import ReaderBase, WriterBase @@ -23,20 +23,27 @@ class ParquetReader(ReaderBase): - def __init__(self, path: Union[str, Path], *args, **kwargs): + """Reader for Parquet files.""" + + def __init__(self, filename: str | Path, *args, **kwargs) -> None: """ Reader for Parquet files. Parameters ---------- - path : Union[str, Path] + filename Path to the Parquet file. + *args + Additional positional arguments passed to the base class. + **kwargs + Additional keyword arguments passed to the base class. """ - self.path = path + super().__init__(filename, *args, **kwargs) - def __iter__(self): - with pq.ParquetFile(self.path) as reader: + def __iter__(self) -> Iterator[PSM]: + """Iterate over the Parquet file and return PSMs one-by-one.""" + with pq.ParquetFile(self.filename) as reader: for batch in reader.iter_batches(): for row in batch.to_pylist(): # Convert map columns (rendered as lists of tuples) to dictionaries @@ -52,39 +59,49 @@ def __iter__(self): class ParquetWriter(WriterBase): - def __init__(self, path: Union[str, Path], chunk_size: int = 1e6, *args, **kwargs): + """Writer for Parquet files.""" + + def __init__(self, filename: str | Path, *args, chunk_size: int = int(1e6), **kwargs): """ Writer for Parquet files. Parameters ---------- - path : Union[str, Path] + filename Path to the Parquet file. - chunk_size : int + *args + Additional positional arguments passed to the base class. + chunk_size Number of PSMs to write in a single batch. Default is 1e6. + **kwargs + Additional keyword arguments passed to the base class. """ - self.path = path + super().__init__(filename, *args, **kwargs) + self.chunk_size = chunk_size - self._writer = None - self._psm_cache = [] + self._writer: pq.ParquetWriter | None = None + self._psm_cache: list[dict] = [] - def __enter__(self): - self._writer = pq.ParquetWriter(self.path, schema=SCHEMA) + def __enter__(self) -> ParquetWriter: + """Open the Parquet writer in a context manager.""" + self._writer = pq.ParquetWriter(self.filename, schema=SCHEMA) return self - def __exit__(self, *args, **kwargs): - self._flush() - self._writer.close() + def __exit__(self, *args, **kwargs) -> None: + """Close the Parquet writer.""" + if self._writer is not None: + self._flush() + self._writer.close() - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """Write a single PSM to the Parquet file.""" self._psm_cache.append(self._psm_to_entry(psm)) if len(self._psm_cache) > self.chunk_size: self._flush() - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """Write a list of PSMs to the Parquet file.""" with self: for psm in psm_list: @@ -97,10 +114,12 @@ def _psm_to_entry(psm: PSM) -> dict: psm_dict["peptidoform"] = str(psm.peptidoform) return psm_dict - def _flush(self): + def _flush(self) -> None: """Write the cached PSMs to the Parquet file.""" if not self._psm_cache: return + if self._writer is None: + raise PSMUtilsIOException("ParquetWriter must be opened in a context manager.") table = pa.Table.from_pylist(self._psm_cache, schema=SCHEMA) self._writer.write_table(table) self._psm_cache = [] diff --git a/psm_utils/io/peptide_record.py b/psm_utils/io/peptide_record.py index 8afb97f..d3ba916 100644 --- a/psm_utils/io/peptide_record.py +++ b/psm_utils/io/peptide_record.py @@ -55,113 +55,78 @@ from __future__ import annotations import csv -from collections import namedtuple +from collections.abc import Iterator from pathlib import Path -from typing import Iterable, NamedTuple, Optional +from typing import Any, TextIO import pandas as pd +from pydantic import BaseModel, ConfigDict from psm_utils.io._base_classes import ReaderBase, WriterBase +from psm_utils.io._utils import set_csv_field_size_limit from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.peptidoform import Peptidoform from psm_utils.psm import PSM from psm_utils.psm_list import PSMList -from psm_utils.io._utils import set_csv_field_size_limit set_csv_field_size_limit() -class _PeptideRecord: - """Helper class for handling Peptide Record files.""" - - required_columns = ["spec_id", "peptide", "modifications"] - optional_columns = [ - "charge", - "observed_retention_time", - "predicted_retention_time", - "label", - "score", - ] - - def __init__( - self, - filename: str | Path, - required_columns: list[str] = None, - optional_columns: list[str] = None, - ) -> None: - """ - Helper class for handling Peptide Record files. - - Upon initialization, the separator inferred and presence of required columns - is checked. +_REQUIRED_COLUMNS = ["spec_id", "peptide", "modifications"] +_OPTIONAL_COLUMNS = [ + "charge", + "observed_retention_time", + "predicted_retention_time", + "label", + "score", +] + + +def _analyze_peprec_file(filename: str | Path) -> tuple[str, list[str]]: + """Analyze Peptide Record file to determine separator and validate columns.""" + separator = "" + header: list[str] = [] + + with open(filename) as f: + line = f.readline().strip() + for sep in ["\t", ",", ";", " "]: + cols = line.split(sep) + if all(rc in cols for rc in _REQUIRED_COLUMNS): + separator = sep + header = cols + break + else: + raise InvalidPeprecError( + "Could not infer separator. Please validate the Peptide Record " + "header and/or the required columns." + ) - Parameters - ---------- - filename: str, pathlib.Path - Path to PSM file. - required_columns: list[str] - Override default columns. - optional_columns: list[str] - Override default columns. + # Validate required columns + for rc in _REQUIRED_COLUMNS: + if rc not in header: + raise InvalidPeprecError(f"Required column missing: `{rc}`") - Attributes - ---------- - separator: str - Separator (delimiter) used in Peptide Record file. - header: list[str] - Column names used in Peptide Record file. + return separator, header - Raises - ------ - InvalidPeprecError - If Peptide Record separator cannot be inferred from header. - """ - self.filename = filename - self.separator = None - self.header = None +class _PeprecEntry(BaseModel): + """Typed entry for Peptide Record data.""" - if required_columns: - self.required_columns = required_columns - else: - self.required_columns = self.required_columns.copy() # Copy from class - if optional_columns: - self.optional_columns = optional_columns - else: - self.optional_columns = self.optional_columns.copy() # Copy from class - - self._infer_separator() - self._validate_required_columns() - - def __repr__(self) -> str: - return f"_PeptideRecord('{self.filename}')" - - def _infer_separator(self) -> None: - """Infer separator used in Peptide Record file.""" - with open(self.filename, "rt") as f: - line = f.readline().strip() - for sep in ["\t", ",", ";", " "]: - cols = line.split(sep) - if all(rc in cols for rc in self.required_columns): - self.separator = sep - break - else: - raise InvalidPeprecError( - "Could not infer separator. Please validate the Peptide Record " - "header and/or the `required_columns` setting." - ) + spec_id: str + peptide: str + modifications: str + charge: str | None = None + observed_retention_time: str | None = None + predicted_retention_time: str | None = None + label: str | None = None + score: str | None = None - def _validate_required_columns(self) -> None: - """Raise InvalidPeprecError if not all required columns are present.""" - with open(self.filename, "rt") as f: - reader = csv.reader(f, delimiter=self.separator) - self.header = next(reader) - for rc in self.required_columns: - if rc not in self.header: - raise InvalidPeprecError(f"Required column missing: `{rc}`") + model_config = ConfigDict(extra="ignore") class PeptideRecordReader(ReaderBase): + """Reader for Peptide Record PSM files.""" + def __init__( self, filename: str | Path, @@ -173,12 +138,15 @@ def __init__( Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. Examples -------- - PeptideRecordReader supports iteration: >>> from psm_utils.io.peptide_record import PeptideRecordReader @@ -188,36 +156,30 @@ def __init__( AC[Carbamidomethyl]DEFGR [Acetyl]-AC[Carbamidomethyl]DEFGHIK - Or a full file can be read at once into a :py:class:`~psm_utils.psm_list.PSMList` - object: + Or a full file can be read at once into a :py:class:`~psm_utils.psm_list.PSMList` object: >>> peprec_reader = PeptideRecordReader("peprec.txt") >>> psm_list = peprec_reader.read_file() """ - super().__init__(filename, *args, **kwargs) - self._peprec = _PeptideRecord(self.filename) + self._separator, self._header = _analyze_peprec_file(self.filename) - # Define named tuple for single Peptide Record entries, based on - # configured columns - columns = self._peprec.required_columns + self._peprec.optional_columns - self.PeprecEntry = namedtuple("PeprecEntry", columns, defaults=[None for _ in columns]) - - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "rt") as open_file: - reader = csv.DictReader(open_file, delimiter=self._peprec.separator) + with open(self.filename) as open_file: + reader = csv.DictReader(open_file, delimiter=self._separator) for row in reader: - entry = self.PeprecEntry(**row) + entry = _PeprecEntry(**row) psm = self._entry_to_psm(entry, filename=self.filename) yield psm @staticmethod - def _entry_to_psm(entry: NamedTuple, filename: Optional[str] = None) -> PSM: - """Parse single Peptide Record entry to `PSM`.""" + def _entry_to_psm(entry: _PeprecEntry, filename: str | Path) -> PSM: + """Parse single Peptide Record entry to PSM.""" # Parse sequence and modifications - proforma = peprec_to_proforma(entry.peptide, entry.modifications, entry.charge) + charge = int(entry.charge) if entry.charge else None + proforma = peprec_to_proforma(entry.peptide, entry.modifications, charge) # Parse decoy label if entry.label: @@ -226,7 +188,8 @@ def _entry_to_psm(entry: NamedTuple, filename: Optional[str] = None) -> PSM: is_decoy = is_decoy_map[entry.label] except (ValueError, KeyError) as e: raise InvalidPeprecError( - f"Could not parse value for `label` {entry.label}. Should be `1` or `-1`." + f"Could not parse value for `label` {entry.label}. " + "Should be `1` or `-1`." ) from e else: is_decoy = None @@ -243,35 +206,43 @@ def _entry_to_psm(entry: NamedTuple, filename: Optional[str] = None) -> PSM: class PeptideRecordWriter(WriterBase): - def __init__(self, filename, *args, **kwargs): + """Writer for Peptide Record PSM files.""" + + def __init__(self, filename: str | Path, *args, **kwargs) -> None: """ Writer for Peptide Record PSM files. Parameters ---------- - filename: str, Path - Path to PSM file + filename + Path to PSM file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) - self._open_file = None - self._writer = None + self._open_file: TextIO | None = None + self._writer: csv.DictWriter | None = None def __enter__(self) -> PeptideRecordWriter: + """Open file for writing and prepare CSV writer.""" + # If file exists, analyze it to determine separator and header if Path(self.filename).is_file(): - peprec = _PeptideRecord(self.filename) - self._open_file = open(self.filename, "at", newline="") + separator, header = _analyze_peprec_file(self.filename) + self._open_file = open(self.filename, "a", newline="") self._writer = csv.DictWriter( self._open_file, - fieldnames=peprec.header, + fieldnames=header, extrasaction="ignore", - delimiter=peprec.separator, + delimiter=separator, ) else: - self._open_file = open(self.filename, "wt", newline="") + self._open_file = open(self.filename, "w", newline="") self._writer = csv.DictWriter( self._open_file, - fieldnames=_PeptideRecord.required_columns + _PeptideRecord.optional_columns, + fieldnames=_REQUIRED_COLUMNS + _OPTIONAL_COLUMNS, extrasaction="ignore", delimiter=" ", ) @@ -279,12 +250,15 @@ def __enter__(self) -> PeptideRecordWriter: return self def __exit__(self, *args, **kwargs) -> None: - self._open_file.close() + """Close file when exiting context.""" + if self._open_file is not None: + self._open_file.close() self._open_file = None self._writer = None @staticmethod - def _psm_to_entry(psm: PSM) -> dict: + def _psm_to_entry(psm: PSM) -> dict[str, Any]: + """Convert PSM to Peptide Record entry dictionary.""" sequence, modifications, charge = proforma_to_peprec(psm.peptidoform) return { "spec_id": psm.spectrum_id, @@ -296,77 +270,77 @@ def _psm_to_entry(psm: PSM) -> dict: "score": psm.score, } - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """ Write a single PSM to new or existing Peptide Record PSM file. Parameters ---------- - psm: PSM + psm PSM object to write. Examples -------- - To write single PSMs to a file, :py:class:`PeptideRecordWriter` must be opened - as a context manager. Then, within the context, :py:func:`write_psm` can be - called: + To write single PSMs to a file, :py:class:`PeptideRecordWriter` must be opened as + a context manager. Then, within the context, :py:func:`write_psm` can be called: >>> with PeptideRecordWriter("peprec.txt") as writer: >>> writer.write_psm(psm) """ + if self._writer is None: + raise PSMUtilsIOException( + f"`write_psm` method can only be called if `{self.__class__.__qualname__}` " + "is opened in context (i.e., using the `with` statement)." + ) entry = self._psm_to_entry(psm) try: self._writer.writerow(entry) except AttributeError as e: raise PSMUtilsIOException( - f"`write_psm` method can only be called if `{self.__class__.__qualname__}`" + f"`write_psm` method can only be called if `{self.__class__.__qualname__}` " "is opened in context (i.e., using the `with` statement)." ) from e - # TODO: Support appending to existing file? - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """ Write an entire PSMList to a new Peptide Record PSM file. Parameters ---------- - psm_list: PSMList + psm_list PSMList object to write to file. Examples -------- - >>> writer = PeptideRecordWriter("peprec.txt") >>> writer.write_file(psm_list) """ - with open(self.filename, "wt", newline="") as f: - fieldnames = _PeptideRecord.required_columns + _PeptideRecord.optional_columns + with open(self.filename, "w", newline="") as f: + fieldnames = _REQUIRED_COLUMNS + _OPTIONAL_COLUMNS writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter=" ") writer.writeheader() for psm in psm_list: writer.writerow(self._psm_to_entry(psm)) -def peprec_to_proforma( - peptide: str, modifications: str, charge: Optional[int] = None -) -> Peptidoform: +def peprec_to_proforma(peptide: str, modifications: str, charge: int | None = None) -> Peptidoform: """ Convert Peptide Record notation to :py:class:`~psm_utils.peptidoform.Peptidoform`. Parameters ---------- - peptide: str + peptide Stripped peptide sequence. - modifications: str + modifications Modifications in Peptide Record notation (e.g., ``4|Oxidation``) - charge: int, optional + charge Precursor charge state Returns ------- - peptidoform: psm_utils.peptidoform.Peptidoform + peptidoform Peptidoform Raises @@ -376,26 +350,26 @@ def peprec_to_proforma( """ # List of peptide sequence with added terminal positions - peptide = [""] + list(peptide) + [""] + peptide_list = [""] + list(peptide) + [""] # Add modification labels for position, label in zip(modifications.split("|")[::2], modifications.split("|")[1::2]): try: - peptide[int(position)] += f"[{label}]" + peptide_list[int(position)] += f"[{label}]" except ValueError as e: raise InvalidPeprecModificationError( f"Could not parse PEPREC modification `{modifications}`." ) from e except IndexError as e: raise InvalidPeprecModificationError( - f"PEPREC modification has invalid position {position} in " - f"peptide `{''.join(peptide)}`." + f"PEPREC modification has invalid position {position} in peptide " + f"`{''.join(peptide_list)}`." ) from e # Add dashes between residues and termini, and join sequence - peptide[0] = peptide[0] + "-" if peptide[0] else "" - peptide[-1] = "-" + peptide[-1] if peptide[-1] else "" - proforma_seq = "".join(peptide) + peptide_list[0] = peptide_list[0] + "-" if peptide_list[0] else "" + peptide_list[-1] = "-" + peptide_list[-1] if peptide_list[-1] else "" + proforma_seq = "".join(peptide_list) # Add charge state if charge: @@ -404,21 +378,22 @@ def peprec_to_proforma( return Peptidoform(proforma_seq) -def proforma_to_peprec(peptidoform: Peptidoform) -> tuple(str, str, Optional[int]): +def proforma_to_peprec(peptidoform: Peptidoform) -> tuple[str, str, int | None]: """ Convert :py:class:`~psm_utils.peptidoform.Peptidoform` to Peptide Record notation. Parameters ---------- - peptidoform: psm_utils.peptidoform.Peptidoform + peptidoform + Input peptidoform object. Returns ------- - peptide: str + peptide Stripped peptide sequence - modifications: str + modifications Modifications in Peptide Record notation - charge: int, optional + charge Precursor charge state, if available, else :py:const:`None` Notes @@ -430,8 +405,8 @@ def proforma_to_peprec(peptidoform: Peptidoform) -> tuple(str, str, Optional[int """ - def _mod_to_ms2pip(mod_list: list, location: int): - """Proforma modification site (list) to MS²PIP modification.""" + def _mod_to_ms2pip(mod_list: list, location: int) -> str: + """Convert Proforma modification site to MS²PIP modification.""" if len(mod_list) > 1: raise InvalidPeprecModificationError( "Multiple modifications per site not supported in Peptide Record format." @@ -463,23 +438,20 @@ def from_dataframe(peprec_df: pd.DataFrame) -> PSMList: Parameters ---------- - peprec_df: pandas.DataFrame + peprec_df Peptide Record DataFrame Returns ------- - psm_list: PSMList + psm_list PSMList object """ - PeprecEntry = namedtuple( - "PeprecEntry", peprec_df.columns, defaults=[None for _ in peprec_df.columns] - ) psm_list = [] for _, row in peprec_df.iterrows(): - entry = PeprecEntry(**row) - psm_list.append(PeptideRecordReader._entry_to_psm(entry)) - return PSMList(psm_list) + entry = _PeprecEntry(**row.to_dict()) + psm_list.append(PeptideRecordReader._entry_to_psm(entry, filename="")) + return PSMList(psm_list=psm_list) def to_dataframe(psm_list: PSMList) -> pd.DataFrame: @@ -488,15 +460,16 @@ def to_dataframe(psm_list: PSMList) -> pd.DataFrame: Parameters ---------- - psm_list: PSMList + psm_list + Input PSMList object. Returns ------- pd.DataFrame + Peptide Record DataFrame. Examples -------- - >>> psm_list = PeptideRecordReader("peprec.csv").read_file() >>> psm_utils.io.peptide_record.to_dataframe(psm_list) spec_id peptide modifications charge label ... diff --git a/psm_utils/io/pepxml.py b/psm_utils/io/pepxml.py index 42e5b99..56cdbab 100644 --- a/psm_utils/io/pepxml.py +++ b/psm_utils/io/pepxml.py @@ -4,10 +4,11 @@ import logging from collections import defaultdict +from collections.abc import Iterator from pathlib import Path -from typing import List, Optional, Union +from typing import Any -from pyteomics import mass, pepxml, proforma +from pyteomics import mass, pepxml, proforma # type: ignore[import] from psm_utils.io._base_classes import ReaderBase from psm_utils.peptidoform import Peptidoform @@ -28,23 +29,31 @@ class PepXMLReader(ReaderBase): - def __init__(self, filename: Union[str, Path], *args, score_key: str = None, **kwargs) -> None: + """Reader for pepXML PSM files.""" + + def __init__( + self, filename: str | Path, *args: Any, score_key: str | None = None, **kwargs: Any + ) -> None: """ Reader for pepXML PSM files. Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. - score_key: str, optional + *args + Additional positional arguments passed to parent class. + score_key Name of the score metric to use as PSM score. If not provided, the score metric is inferred from a list of known search engine scores. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) self.score_key = score_key or self._infer_score_name() - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with pepxml.read(str(self.filename)) as reader: for spectrum_query in reader: @@ -53,33 +62,39 @@ def __iter__(self): for search_hit in spectrum_query["search_hit"]: yield self._parse_psm(spectrum_query, search_hit) - def _infer_score_name(self) -> str: + def _infer_score_name(self) -> str | None: """Infer the score from the list of known PSM scores.""" # Get scores from first PSM with pepxml.read(str(self.filename)) as reader: for spectrum_query in reader: score_keys = spectrum_query["search_hit"][0]["search_score"].keys() break + else: + score_keys = [] # Infer score name if not score_keys: logger.warning("No pepXML scores found.") return None - else: - for score in STANDARD_SEARCHENGINE_SCORES: # Check for known scores - if score in score_keys: - logger.debug(f"Using known pepXML score `{score}`.") - return score - else: - logger.warning(f"No known pepXML scores found. Defaulting to `{score_keys[0]}`.") - return score_keys[0] # Default to the first one if nothing found + + for score in STANDARD_SEARCHENGINE_SCORES: # Check for known scores + if score in score_keys: + logger.debug(f"Using known pepXML score `{score}`.") + return score + + # Default to the first one if nothing found + logger.warning(f"No known pepXML scores found. Defaulting to `{score_keys[0]}`.") + return score_keys[0] @staticmethod - def _parse_peptidoform(peptide: str, modifications: List[dict], charge: Optional[int] = None): - """Parse pepXML peptide to :py:class:`~psm_utils.peptidoform.Peptidoform`.""" - modifications_dict = defaultdict(list) - n_term = [] - c_term = [] + def _parse_peptidoform( + peptide: str, modifications: list[dict[str, Any]], charge: int | None = None + ) -> Peptidoform: + """Parse pepXML peptide to Peptidoform.""" + modifications_dict: dict[int, list[Any]] = defaultdict(list) + n_term: list[Any] = [] + c_term: list[Any] = [] + for mod in modifications: # Round mass modification to 6 decimal places, precision from UniMod if mod["position"] == 0: @@ -110,8 +125,22 @@ def _parse_peptidoform(peptide: str, modifications: List[dict], charge: Optional } return Peptidoform(proforma.ProForma(sequence, properties)) - def _parse_psm(self, spectrum_query: dict, search_hit: dict) -> PSM: - """Parse pepXML PSM to :py:class:`~psm_utils.psm.PSM`.""" + def _parse_psm(self, spectrum_query: dict[str, Any], search_hit: dict[str, Any]) -> PSM: + """Parse pepXML PSM to PSM.""" + # Build metadata dictionary properly + metadata = { + "num_matched_ions": str(search_hit["num_matched_ions"]), + "tot_num_ions": str(search_hit["tot_num_ions"]), + "num_missed_cleavages": str(search_hit["num_missed_cleavages"]), + } + # Add search scores to metadata + metadata.update( + { + f"search_score_{key.lower()}": str(search_hit["search_score"][key]) + for key in search_hit["search_score"] + } + ) + return PSM( peptidoform=self._parse_peptidoform( search_hit["peptide"], @@ -129,12 +158,8 @@ def _parse_psm(self, spectrum_query: dict, search_hit: dict) -> PSM: precursor_mz=mass_to_mz( spectrum_query["precursor_neutral_mass"], spectrum_query["assumed_charge"] ), - retention_time=spectrum_query["retention_time_sec"] - if "retention_time_sec" in spectrum_query - else None, - ion_mobility=spectrum_query["ion_mobility"] - if "ion_mobility" in spectrum_query - else None, + retention_time=spectrum_query.get("retention_time_sec"), + ion_mobility=spectrum_query.get("ion_mobility"), protein_list=[p["protein"] for p in search_hit["proteins"]], rank=search_hit["hit_rank"], source=None, @@ -143,15 +168,6 @@ def _parse_psm(self, spectrum_query: dict, search_hit: dict) -> PSM: "start_scan": str(spectrum_query["start_scan"]), "end_scan": str(spectrum_query["end_scan"]), }, - metadata={ - "num_matched_ions": str(search_hit["num_matched_ions"]), - "tot_num_ions": str(search_hit["tot_num_ions"]), - "num_missed_cleavages": str(search_hit["num_missed_cleavages"]), - }.update( - { - f"search_score_{key.lower()}": str(search_hit["search_score"][key]) - for key in search_hit["search_score"] - } - ), - rescoring_features=dict(), + metadata=metadata, + rescoring_features={}, ) diff --git a/psm_utils/io/percolator.py b/psm_utils/io/percolator.py index 045d09c..07919ad 100644 --- a/psm_utils/io/percolator.py +++ b/psm_utils/io/percolator.py @@ -6,7 +6,6 @@ Notes ----- - * While :py:class:`PercolatorTabReader` supports reading the peptide notation with preceding and following amino acids (e.g. ``R.ACDEK.F``), these amino acids are not stored and are not written by :py:class:`PercolatorTabWriter`. @@ -18,8 +17,9 @@ import csv import logging import re +from collections.abc import Iterator from pathlib import Path -from typing import Iterable, List, Optional, Tuple, Union +from typing import Any from psm_utils.io._base_classes import ReaderBase, WriterBase from psm_utils.io._utils import set_csv_field_size_limit @@ -33,14 +33,16 @@ class PercolatorTabReader(ReaderBase): + """Reader for Percolator Tab PIN/POUT format.""" + def __init__( self, filename: str | Path, - score_column=None, - retention_time_column=None, - mz_column=None, - *args, - **kwargs, + *args: Any, + score_column: str | None = None, + retention_time_column: str | None = None, + mz_column: str | None = None, + **kwargs: Any, ) -> None: """ Reader for Percolator Tab PIN/POUT PSM file. @@ -48,21 +50,23 @@ def __init__( As the score, retention time, and precursor m/z are often embedded as feature columns, but not with a fixed column name, their respective column names need to be provided as parameters to the class. If not provided, these properties will - not be added to the resulting :py:class:`~psm_utils.psm.PSM`. - Nevertheless, they will still be added to its - :py:attr:`~psm_utils.psm.PSM.rescoring_features` property - dictionary, along with the other features. + not be added to the resulting PSM. Nevertheless, they will still be added to its + rescoring_features property dictionary, along with the other features. Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. - score_column: str, optional + *args + Additional positional arguments passed to parent class. + score_column Name of the column that holds the primary PSM score. - retention_time_column: str, optional + retention_time_column Name of the column that holds the retention time. - mz_column: str, optional + mz_column Name of the column that holds the precursor m/z. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) @@ -99,31 +103,28 @@ def __init__( for col in [self.score_column, self.rt_column, self.mz_column]: if col and col.lower() not in self.fieldnames: raise PercolatorIOException( - f"Column `{col}` not found in header of Percolator Tab file " - f"`{self.filename}`." + f"Column `{col}` not found in header of Percolator Tab file `{self.filename}`." ) - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with _PercolatorTabIO( self.filename, "rt", protein_separator=self._protein_separator ) as open_file: reader = csv.DictReader(open_file, delimiter="\t") for entry in reader: - if entry[self.id_column] == "DefaultDirection": - pass - else: - psm = self._parse_entry(entry) - yield psm + if entry[self.id_column] != "DefaultDirection": + yield self._parse_entry(entry) @staticmethod - def _read_header(filename): - with open(filename, "rt") as f: + def _read_header(filename: str | Path) -> list[str]: + """Read header line and return fieldnames.""" + with open(filename) as f: fieldnames = f.readline().strip().lower().split("\t") return fieldnames @staticmethod - def _infer_charge_columns(fieldnames): + def _infer_charge_columns(fieldnames: list[str]) -> tuple[str | None, dict[int, str]]: """Infer columns that hold the precursor charge from the header fieldnames.""" # Infer single charge column charge_column = None @@ -142,7 +143,7 @@ def _infer_charge_columns(fieldnames): return charge_column, charge_onehot_columns @staticmethod - def _parse_peptidoform(percolator_peptide, charge): + def _parse_peptidoform(percolator_peptide: str, charge: int | None) -> Peptidoform: """Parse Percolator TSV peptide notation to Peptidoform.""" # Remove leading and trailing amino acids match = re.match(r"^(?:[A-Z-])?\.(.+)\.(?:[A-Z-])?$", percolator_peptide) @@ -151,7 +152,7 @@ def _parse_peptidoform(percolator_peptide, charge): peptidoform += f"/{charge}" return Peptidoform(peptidoform) - def _parse_charge(self, entry) -> int | None: + def _parse_charge(self, entry: dict[str, str]) -> int | None: """Parse charge state from single or one-hot encoded charge state.""" if self.charge_column: return int(entry["charge"]) @@ -159,32 +160,32 @@ def _parse_charge(self, entry) -> int | None: for charge_state, column_name in self.charge_onehot_columns.items(): if entry[column_name] == "1": return charge_state - else: - return None + return None - def _parse_entry(self, entry): + def _parse_entry(self, entry: dict[str, str]) -> PSM: """Parse Percolator TSV entry to PSM.""" - label = entry["label"] if "label" in entry else None + label = entry.get("label") is_decoy = True if label == "-1" else False if label == "1" else None rescoring_features = { k: str(v) for k, v in entry.items() if k not in self.non_feature_columns } charge = self._parse_charge(entry) peptidoform = self._parse_peptidoform(entry["peptide"], charge) - protein_list = ( - entry["proteins"].split(self._protein_separator) - if "proteins" in entry - else entry["proteinids"].split(self._protein_separator) - if "proteinids" in entry - else None - ) - psm = PSM( + + # Get protein list + protein_list = None + if "proteins" in entry: + protein_list = entry["proteins"].split(self._protein_separator) + elif "proteinids" in entry: + protein_list = entry["proteinids"].split(self._protein_separator) + + return PSM( peptidoform=peptidoform, spectrum_id=entry[self.id_column], is_decoy=is_decoy, score=float(entry[self.score_column.lower()]) if self.score_column else None, - qvalue=entry["q-value"] if "q-value" in entry else None, - pep=entry["posterior_error_prob"] if "posterior_error_prob" in entry else None, + qvalue=entry.get("q-value"), + pep=entry.get("posterior_error_prob"), precursor_mz=float(entry[self.mz_column.lower()]) if self.mz_column else None, retention_time=float(entry[self.rt_column.lower()]) if self.rt_column else None, protein_list=protein_list, @@ -192,43 +193,47 @@ def _parse_entry(self, entry): provenance_data={"filename": str(self.filename)}, rescoring_features=rescoring_features, ) - return psm class PercolatorTabWriter(WriterBase): + """Writer for Percolator Tab PIN/POUT format.""" + def __init__( self, filename: str | Path, - style: Optional[str] = None, - feature_names: Optional[list[str]] = None, + *args: Any, + style: str | None = None, + feature_names: list[str] | None = None, add_basic_features: bool = False, - *args, - **kwargs, + **kwargs: Any, ) -> None: """ Writer for Percolator TSV "PIN" and "POUT" PSM files. Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. - style: str, optional + *args + Additional positional arguments passed to parent class. + style Percolator Tab style. One of {``pin``, ``pout``}. If ``pin``, the columns ``SpecId``, ``Label``, ``ScanNr``, ``ChargeN``, ``PSMScore``, ``Peptide``, and ``Proteins`` are written alongside the requested feature names (see ``feature_names``). If ``pout``, the columns ``PSMId``, ``Label``, ``score``, ``q-value``, ``posterior_error_prob``, ``peptide``, and ``proteinIds`` are written. By default, the style is inferred from the file name extension. - feature_names: list[str], optional + feature_names List of feature names to extract from PSMs and write to file. List values - should correspond to keys in the - :py:class:`~psm_utils.psm.PSM.rescoring_features` property. - If :py:const:`None`, no rescoring features will be written to the file. If appending to + should correspond to keys in the rescoring_features property. + If None, no rescoring features will be written to the file. If appending to an existing file, the existing header will be used to determine the feature names. Only has effect with ``pin`` style. - add_basic_features: bool, optional - If :py:const:`True`, add ``PSMScore`` and ``ChargeN`` features to the file. Only has - effect with ``pin`` style. Default is :py:const:`False`. + add_basic_features + If True, add ``PSMScore`` and ``ChargeN`` features to the file. Only has + effect with ``pin`` style. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) @@ -270,8 +275,8 @@ def __init__( else: raise ValueError("Invalid Percolator Tab style. Should be one of {`pin`, `pout`}.") - self._open_file = None - self._writer = None + self._open_file: _PercolatorTabIO | None = None + self._writer: csv.DictWriter[str] | None = None self._protein_separator = "|||" self._current_scannr = 0 @@ -283,7 +288,7 @@ def __enter__(self) -> PercolatorTabWriter: self.filename, mode, newline="", protein_separator=self._protein_separator ) if file_existed: - fieldnames, self._current_scannr = self._parse_existing_file(self.filename) + fieldnames, self._current_scannr = self._parse_existing_file(self.filename, self.style) else: fieldnames = self._columns self._current_scannr = -1 @@ -298,27 +303,27 @@ def __enter__(self) -> PercolatorTabWriter: return self def __exit__(self, *args, **kwargs) -> None: - self._open_file.close() + """Close file and writer.""" + if self._open_file is not None: + self._open_file.close() self._open_file = None self._writer = None - self._current_scannr = None + self._current_scannr = 0 - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """Write a single PSM to the PSM file.""" + if self._writer is None: + raise PSMUtilsIOException( + f"`write_psm` method can only be called if `{self.__class__.__qualname__}`" + " is opened in context (i.e., using the `with` statement)." + ) entry = self._psm_to_entry(psm) self._current_scannr += 1 entry["ScanNr"] = self._current_scannr - try: - self._writer.writerow(entry) - except AttributeError as e: - raise PSMUtilsIOException( - f"`write_psm` method can only be called if `{self.__class__.__qualname__}`" - "is opened in context (i.e., using the `with` statement)." - ) from e - else: - self._current_scannr = entry["ScanNr"] + self._writer.writerow(entry) + self._current_scannr = entry["ScanNr"] - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """Write an entire PSMList to the PSM file.""" with _PercolatorTabIO( self.filename, "wt", newline="", protein_separator=self._protein_separator @@ -330,10 +335,10 @@ def write_file(self, psm_list: PSMList): for psm in psm_list: writer.writerow(self._psm_to_entry(psm)) - def _psm_to_entry(self, psm: PSM): + def _psm_to_entry(self, psm: PSM) -> dict[str, Any]: """Parse PSM to Percolator Tab entry.""" if self.style == "pin": - entry = { + entry: dict[str, Any] = { "SpecId": psm.spectrum_id, "Label": None if psm.is_decoy is None else -1 if psm.is_decoy else 1, "Peptide": "." + re.sub(r"/\d+$", "", psm.peptidoform.proforma) + ".", @@ -343,10 +348,8 @@ def _psm_to_entry(self, psm: PSM): } if self.add_basic_features: entry.update({"ChargeN": psm.peptidoform.precursor_charge, "PSMScore": psm.score}) - try: + if psm.rescoring_features is not None: entry.update(psm.rescoring_features) - except TypeError: - pass elif self.style == "pout": entry = { "PSMId": psm.spectrum_id, @@ -359,15 +362,15 @@ def _psm_to_entry(self, psm: PSM): if psm.protein_list else None, } + else: + raise ValueError("Invalid Percolator Tab style. Should be one of {`pin`, `pout`}.") return entry @staticmethod - def _parse_existing_file( - filename: Union[str, Path], style: str - ) -> Tuple[List[str], Optional[int]]: + def _parse_existing_file(filename: str | Path, style: str) -> tuple[list[str], int]: """Parse existing Percolator Tab file to determine fieldnames and last ScanNr.""" # Get fieldnames - with open(filename, "rt") as open_file: + with open(filename) as open_file: for line in open_file: fieldnames = line.strip().split("\t") break @@ -382,7 +385,7 @@ def _parse_existing_file( # Get last ScanNr last_scannr = None - with open(filename, "rt") as open_file: + with open(filename) as open_file: # Read last line open_file.seek(0) last_line = None @@ -407,25 +410,25 @@ def _parse_existing_file( class _PercolatorTabIO: - def __init__(self, *args, protein_separator="|||", **kwargs) -> None: + def __init__(self, *args: Any, protein_separator: str = "|||", **kwargs: Any) -> None: """File reader and writer for Percolator Tab files with fixed Proteins tab.""" self._open_file = open(*args, **kwargs) self.protein_separator = protein_separator - def __enter__(self, *args, **kwargs) -> _PercolatorTabIO: + def __enter__(self) -> _PercolatorTabIO: return self - def __exit__(self, *args, **kwargs): + def __exit__(self, *args: Any) -> None: self.close() - def __iter__(self): - """Yield lines in file with Proteins tab replaced by separator.""" - number_of_columns = None + def __iter__(self) -> Iterator[str]: + """Iterate over lines in file with Proteins tab replaced by separator.""" + number_of_columns = 0 for i, line in enumerate(self._open_file): if i == 0: number_of_columns = len(line.split("\t")) yield line.lower() - elif i == 1 & line.startswith("DefaultDirection"): + elif i == 1 and line.startswith("DefaultDirection"): yield line else: r = line.strip().split("\t") @@ -435,16 +438,16 @@ def __iter__(self): line = "\t".join(row_columns) + "\n" yield line - def close(self): + def close(self) -> None: self._open_file.close() - def write(self, __s: str): + def write(self, __s: str) -> None: """Write line to file with Proteins separator replaced by tab.""" __s = __s.replace(self.protein_separator, "\t") self._open_file.write(__s) -def _fieldnames_are_valid(fieldnames: List[str], style: str) -> bool: +def _fieldnames_are_valid(fieldnames: list[str], style: str) -> bool: """Check if fieldnames are valid for Percolator Tab style.""" if style == "pin": required_columns = ["SpecId", "Label", "ScanNr"] @@ -459,15 +462,18 @@ def join_pout_files( target_filename: str | Path, decoy_filename: str | Path, output_filename: str | Path, -): +) -> None: """ Join target and decoy Percolator Out (POUT) files into single PercolatorTab file. Parameters ---------- - target_filename: str, Path - decoy_filename: str, Path - output_filename: str, Path + target_filename + Path to target POUT file. + decoy_filename + Path to decoy POUT file. + output_filename + Path to output combined POUT file. """ target_reader = PercolatorTabReader(target_filename, score_column="score") diff --git a/psm_utils/io/proteome_discoverer.py b/psm_utils/io/proteome_discoverer.py index edb7a5e..f3de36d 100644 --- a/psm_utils/io/proteome_discoverer.py +++ b/psm_utils/io/proteome_discoverer.py @@ -1,14 +1,39 @@ -"""Reader for Proteome Discoverer MSF PSM files.""" +""" +Reader for Proteome Discoverer MSF PSM files. + +This module provides functionality to read PSM data from Proteome Discoverer MSF SQLite database +files. + +The reader supports both target and decoy peptides, handles various modification types (amino acid +and terminal modifications), and extracts complete scoring information from the MSF database +structure. + +Examples +-------- +>>> from psm_utils.io.proteome_discoverer import MSFReader +>>> reader = MSFReader("results.msf") +>>> psm_list = reader.read_file() +>>> for psm in reader: +... print(f"{psm.peptidoform} - Score: {psm.score}") + +Notes +----- +MSF file versions 79, 53, and 8 are currently supported. + +""" + +from __future__ import annotations import logging import re from collections import defaultdict +from collections.abc import Iterator from pathlib import Path -from typing import Dict, List, Tuple, Union +from typing import Any -import pyteomics.proforma as proforma -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker +import pyteomics.proforma as proforma # type: ignore[import-untyped] +from sqlalchemy import create_engine, func, select +from sqlalchemy.orm import Session import psm_utils.io._pd_msf_tables as msf from psm_utils import PSM, Peptidoform @@ -16,42 +41,75 @@ logger = logging.getLogger(__name__) -COMPATIBLE_VERSIONS = [79] +COMPATIBLE_VERSIONS = [79, 53, 8] class MSFReader(ReaderBase): - """Reader for Proteome Discoverer MSF files.""" + """ + Reader for Proteome Discoverer MSF files. + + This reader processes SQLite-based MSF database files from Proteome Discoverer, extracting + peptide-spectrum matches with complete modification information, scoring data, and metadata. + Supports both target and decoy peptides. + + Examples + -------- + >>> reader = MSFReader("experiment.msf") + >>> psm_list = reader.read_file() + >>> len(reader) # Get total number of PSMs + 1234 + >>> for psm in reader: # Iterate over all PSMs + ... if psm.qvalue and psm.qvalue < 0.01: + ... print(f"High-confidence PSM: {psm.peptidoform}") + + """ def __init__( self, - filename: Union[str, Path], + filename: str | Path, *args, **kwargs, ) -> None: """ - Reader for Proteome Discoverer MSF file. + Initialize MSF reader with database connection and version validation. Parameters ---------- - filename: str, pathlib.Path - Path to MSF file. + filename + Path to Proteome Discoverer MSF file. + *args + Additional positional arguments passed to parent class. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) self._engine = create_engine(f"sqlite:///{self.filename.as_posix()}") - self._session = sessionmaker(bind=self._engine)() + self._session = Session(self._engine) self._check_version() - def __len__(self): - """Return number of PSMs in file.""" - return sum( - self._session.query(peptide).count() for peptide in [msf.Peptide, msf.PeptideDecoy] + def __len__(self) -> int: + """Return total number of PSMs in file.""" + peptide_count = ( + self._session.execute(select(func.count()).select_from(msf.Peptide)).scalar() or 0 ) + decoy_count = ( + self._session.execute(select(func.count()).select_from(msf.PeptideDecoy)).scalar() or 0 + ) + return peptide_count + decoy_count - def __iter__(self): - """Iterate over file and return PSMs one-by-one.""" + def __iter__(self) -> Iterator[PSM]: + """ + Iterate over file and return PSMs one-by-one. + + Yields + ------ + PSM + Individual PSM objects with complete modification and scoring information. + + """ for is_decoy in [False, True]: modifications = self._get_modifications(is_decoy) terminal_modifications = self._get_terminal_modifications(is_decoy) @@ -60,7 +118,8 @@ def __iter__(self): secondary_scores = self._get_secondary_scores(is_decoy) for entry in self._iter_peptides(is_decoy): - peptide_id = entry.PeptideDecoy.PeptideID if is_decoy else entry.Peptide.PeptideID + peptide = entry[0] # First element is Peptide or PeptideDecoy + peptide_id = peptide.PeptideID yield self._parse_entry( entry, modifications[peptide_id], @@ -71,36 +130,77 @@ def __iter__(self): is_decoy, ) - def _check_version(self): - """Check if MSF file version is compatible.""" - version = self._session.query(msf.SchemaInfo.Version).first()[0] + def __enter__(self) -> ReaderBase: + """Enter context manager for MSFReader.""" + return super().__enter__() + + def __exit__(self, *args, **kwargs) -> None: + """Exit context manager for MSFReader.""" + self._session.close() + return super().__exit__(*args, **kwargs) + + def _check_version(self) -> None: + """Check MSF file version compatibility.""" + first_result = self._session.execute(select(msf.SchemaInfo.Version)).first() + if first_result is None: + logger.warning( + "MSF file does not contain version information. " + "Assuming it is compatible with this reader." + ) + return None + version = first_result[0] if version not in COMPATIBLE_VERSIONS: logger.warning( f"MSF file version {version} might not be compatible with this reader. " f"Checked versions are: {COMPATIBLE_VERSIONS}." ) - def _iter_peptides(self, is_decoy: bool): - """Iterate over peptides in MSF file.""" - Peptide = msf.PeptideDecoy if is_decoy else msf.Peptide - for entry in ( - self._session.query(Peptide, msf.SpectrumHeader, msf.MassPeak, msf.FileInfo) - .select_from(Peptide) - .join(msf.SpectrumHeader, Peptide.SpectrumID == msf.SpectrumHeader.SpectrumID) + def _iter_peptides(self, is_decoy: bool) -> Iterator[Any]: + """ + Iterate over peptides in MSF file. + + Parameters + ---------- + is_decoy : bool + Whether to iterate over decoy peptides instead of target peptides. + + Yields + ------ + Any + SQLAlchemy Row object containing joined Peptide, SpectrumHeader, MassPeak, and + FileInfo data. The Row object has attributes like row[0] (Peptide/PeptideDecoy), + row[1] (SpectrumHeader), row[2] (MassPeak), and row[3] (FileInfo). + + Notes + ----- + This method performs a complex join across multiple MSF database tables to gather + all necessary information for PSM construction. The returned rows contain all + spectral and identification metadata needed for downstream processing. + + """ + # Select appropriate peptide table based on decoy flag + peptide_table = msf.PeptideDecoy if is_decoy else msf.Peptide + + # Build and execute query - same structure for both target and decoy + stmt = ( + select(peptide_table, msf.SpectrumHeader, msf.MassPeak, msf.FileInfo) + .select_from(peptide_table) + .join(msf.SpectrumHeader, peptide_table.SpectrumID == msf.SpectrumHeader.SpectrumID) .join(msf.MassPeak, msf.MassPeak.MassPeakID == msf.SpectrumHeader.MassPeakID) .join(msf.FileInfo, msf.FileInfo.FileID == msf.MassPeak.FileID) - ): - yield entry + ) - def _get_modifications(self, is_decoy: bool) -> Dict[int, Tuple[int, int]]: - """Get all modifications per peptide ID.""" + yield from self._session.execute(stmt) + + def _get_modifications(self, is_decoy: bool) -> dict[int, list[tuple[int, int]]]: + """Get amino acid modifications per peptide ID.""" PeptidesAminoAcidModification = ( msf.PeptidesAminoAcidModificationsDecoy if is_decoy else msf.PeptidesAminoAcidModification ) - query = ( - self._session.query( + stmt = ( + select( PeptidesAminoAcidModification.PeptideID, PeptidesAminoAcidModification.Position, msf.AminoAcidModification.UnimodAccession, @@ -112,19 +212,19 @@ def _get_modifications(self, is_decoy: bool) -> Dict[int, Tuple[int, int]]: == msf.AminoAcidModification.AminoAcidModificationID, ) ) - modifications_by_peptide = defaultdict(list) - for peptide_id, position, unimod_accession in query: + modifications_by_peptide: dict[int, list[tuple[int, int]]] = defaultdict(list) + for peptide_id, position, unimod_accession in self._session.execute(stmt): modifications_by_peptide[peptide_id].append((position, unimod_accession)) return modifications_by_peptide - def _get_terminal_modifications(self, is_decoy: bool) -> Dict[int, Tuple[int, int]]: - """Get terminal modifications for a peptide.""" + def _get_terminal_modifications(self, is_decoy: bool) -> dict[int, list[tuple[int, int]]]: + """Get terminal modifications per peptide ID.""" PeptidesTerminalModification = ( msf.PeptidesTerminalModification if is_decoy else msf.PeptidesTerminalModificationDecoy ) - query = ( - self._session.query( + stmt = ( + select( PeptidesTerminalModification.PeptideID, msf.AminoAcidModification.PositionType, msf.AminoAcidModification.UnimodAccession, @@ -136,32 +236,32 @@ def _get_terminal_modifications(self, is_decoy: bool) -> Dict[int, Tuple[int, in == msf.AminoAcidModification.AminoAcidModificationID, ) ) - terminal_modifications = defaultdict(list) - for peptide_id, position_type, unimod_accession in query: + terminal_modifications: dict[int, list[tuple[int, int]]] = defaultdict(list) + for peptide_id, position_type, unimod_accession in self._session.execute(stmt): terminal_modifications[peptide_id].append((position_type, unimod_accession)) return terminal_modifications - def _get_protein_entries(self, is_decoy: bool) -> Dict[int, List[str]]: - """Get protein descriptions or a peptide.""" + def _get_protein_entries(self, is_decoy: bool) -> dict[int, list[str]]: + """Get protein descriptions per peptide ID.""" PeptidesProtein = msf.PeptidesProteinDecoy if is_decoy else msf.PeptidesProtein - query = ( - self._session.query(PeptidesProtein.PeptideID, msf.ProteinAnnotation.Description) + stmt = ( + select(PeptidesProtein.PeptideID, msf.ProteinAnnotation.Description) .select_from(PeptidesProtein) .join( msf.ProteinAnnotation, PeptidesProtein.ProteinID == msf.ProteinAnnotation.ProteinID, ) ) - proteins = defaultdict(list) - for peptide_id, description in query: + proteins: dict[int, list[str]] = defaultdict(list) + for peptide_id, description in self._session.execute(stmt): proteins[peptide_id].append(re.sub(r"^>", "", description)) return proteins - def _get_main_score(self, is_decoy: bool) -> Dict[int, Tuple[float, str]]: - """Get main score and its name for a peptide.""" + def _get_main_score(self, is_decoy: bool) -> dict[int, tuple[float, str]]: + """Get main score and name per peptide ID.""" PeptideScore = msf.PeptideScoreDecoy if is_decoy else msf.PeptideScore - query = ( - self._session.query( + stmt = ( + select( PeptideScore.PeptideID, PeptideScore.ScoreValue, msf.ProcessingNodeScore.ScoreName ) .select_from(PeptideScore) @@ -171,16 +271,16 @@ def _get_main_score(self, is_decoy: bool) -> Dict[int, Tuple[float, str]]: ) .filter(msf.ProcessingNodeScore.IsMainScore == True) # noqa: E712 ) - scores = dict() - for peptide_id, score_value, score_name in query: + scores: dict[int, tuple[float, str]] = {} + for peptide_id, score_value, score_name in self._session.execute(stmt): scores[peptide_id] = (score_value, score_name) return scores - def _get_secondary_scores(self, is_decoy: bool) -> Dict[int, Dict[str, float]]: - """Get secondary scores and their names for a peptide.""" + def _get_secondary_scores(self, is_decoy: bool) -> dict[int, dict[str, float]]: + """Get secondary scores per peptide ID.""" PeptideScore = msf.PeptideScoreDecoy if is_decoy else msf.PeptideScore - query = ( - self._session.query( + stmt = ( + select( PeptideScore.PeptideID, PeptideScore.ScoreValue, msf.ProcessingNodeScore.ScoreName ) .select_from(PeptideScore) @@ -190,8 +290,8 @@ def _get_secondary_scores(self, is_decoy: bool) -> Dict[int, Dict[str, float]]: ) .filter(msf.ProcessingNodeScore.IsMainScore == False) # noqa: E712 ) - scores = defaultdict(dict) - for peptide_id, score_value, score_name in query: + scores: dict[int, dict[str, float]] = defaultdict(dict) + for peptide_id, score_value, score_name in self._session.execute(stmt): scores[peptide_id][score_name] = score_value return scores @@ -199,8 +299,8 @@ def _compile_peptidoform( self, sequence: str, charge: int, - modifications: List[Tuple[int, int]], - terminal_modifications: List[Tuple[int, int]], + modifications: list[tuple[int, int]], + terminal_modifications: list[tuple[int, int]], ) -> Peptidoform: """ Compile a peptidoform from a sequence, charge, and list of (terminal) modifications. @@ -238,7 +338,7 @@ def _compile_peptidoform( if position_type in [2, 4] # Position types 'Any C-term' or 'Protein C-term' ] - sequence = [(aa, modifications_dict[i] or None) for i, aa in enumerate(sequence)] + parsed_sequence = [(aa, modifications_dict[i] or None) for i, aa in enumerate(sequence)] properties = { "n_term": n_term, "c_term": c_term, @@ -251,48 +351,85 @@ def _compile_peptidoform( "group_ids": [], } - return Peptidoform(proforma.ProForma(sequence, properties)) + return Peptidoform(proforma.ProForma(parsed_sequence, properties)) def _parse_entry( self, - entry: Tuple[msf.Peptide, msf.SpectrumHeader, msf.MassPeak, msf.FileInfo], - modifications: List[Tuple[int, int]], - terminal_modifications: List[Tuple[int, int]], - protein_entries: List[str], - main_score: Tuple[float, str], - secondary_scores: Dict[str, float], + entry: Any, # SQLAlchemy Row[tuple[Peptide|PeptideDecoy, SpectrumHeader, MassPeak, FileInfo]] + modifications: list[tuple[int, int]], + terminal_modifications: list[tuple[int, int]], + protein_entries: list[str], + main_score: tuple[float, str], + secondary_scores: dict[str, float], is_decoy: bool, ) -> PSM: - """Parse an entry from the MSF file.""" - peptide = entry.PeptideDecoy if is_decoy else entry.Peptide + """ + Parse an entry from the MSF file into a PSM object. + + Parameters + ---------- + entry : Any + SQLAlchemy Row object containing joined peptide, spectrum, and file information. + Accessed by index: entry[0] (Peptide/PeptideDecoy), entry[1] (SpectrumHeader), + entry[2] (MassPeak), entry[3] (FileInfo). + modifications : list[tuple[int, int]] + List of tuples containing (position, UNIMOD accession) for amino acid modifications. + terminal_modifications : list[tuple[int, int]] + List of tuples containing (position_type, UNIMOD accession) for terminal modifications. + protein_entries : list[str] + List of protein descriptions associated with this peptide. + main_score : tuple[float, str] + Tuple containing (score_value, score_name) for the main search engine score. + secondary_scores : dict[str, float] + Dictionary mapping score names to values for secondary scores. + is_decoy : bool + Whether this PSM is from a decoy search. + + Returns + ------- + PSM + Complete PSM object with all available metadata and scoring information. + + Notes + ----- + This method constructs a complete PSM object by: + - Creating a peptidoform from sequence and modifications + - Extracting spectrum identification and precursor information + - Including all available scoring metrics + - Adding proteome discoverer-specific metadata + + """ + peptide = entry[0] # First element is Peptide or PeptideDecoy + spectrum_header = entry[1] # Second element is SpectrumHeader + mass_peak = entry[2] # Third element is MassPeak + file_info = entry[3] # Fourth element is FileInfo + return PSM( peptidoform=self._compile_peptidoform( peptide.Sequence, - entry.SpectrumHeader.Charge, + spectrum_header.Charge, modifications, terminal_modifications, ), - spectrum_id=entry.SpectrumHeader.LastScan, - run=Path(entry.FileInfo.FileName).stem, + spectrum_id=spectrum_header.LastScan, + run=Path(file_info.FileName).stem, is_decoy=is_decoy, score=main_score[0], qvalue=None, pep=None, - precursor_mz=entry.MassPeak.Mass, - retention_time=entry.SpectrumHeader.RetentionTime, + precursor_mz=mass_peak.Mass, + retention_time=spectrum_header.RetentionTime, ion_mobility=None, protein_list=protein_entries, rank=peptide.SearchEngineRank, source="proteome_discoverer", provenance_data={ - "scan_numbers": entry.SpectrumHeader.ScanNumbers, + "scan_numbers": spectrum_header.ScanNumbers, }, metadata={ - "ms1_intensity": str(entry.MassPeak.Intensity), - "ms1_percent_isolation_interference": str( - entry.MassPeak.PercentIsolationInterference - ), - "ms1_ion_inject_time": str(entry.MassPeak.IonInjectTime), + "ms1_intensity": str(mass_peak.Intensity), + "ms1_percent_isolation_interference": str(mass_peak.PercentIsolationInterference), + "ms1_ion_inject_time": str(mass_peak.IonInjectTime), "main_score_name": main_score[1], **secondary_scores, }, diff --git a/psm_utils/io/proteoscape.py b/psm_utils/io/proteoscape.py index ddc4386..1b0dcb3 100644 --- a/psm_utils/io/proteoscape.py +++ b/psm_utils/io/proteoscape.py @@ -2,18 +2,19 @@ import logging import re +from collections.abc import Iterator from pathlib import Path -from typing import Union +from typing import Any import numpy as np import pandas as pd -import pyarrow.parquet as pq +import pyarrow.parquet as pq # type: ignore[import] -from psm_utils.psm import PSM -from psm_utils.psm_list import PSMList from psm_utils.io._base_classes import ReaderBase from psm_utils.io.exceptions import PSMUtilsIOException from psm_utils.peptidoform import format_number_as_string +from psm_utils.psm import PSM +from psm_utils.psm_list import PSMList logger = logging.getLogger(__name__) @@ -25,26 +26,30 @@ class ProteoScapeReader(ReaderBase): def __init__( self, - filename: Union[str, Path], - *args, - **kwargs, + filename: str | Path, + *args: Any, + **kwargs: Any, ) -> None: """ Reader for ProteoScape Parquet files. Parameters ---------- - filename: str, pathlib.Path - Path to MSF file. + filename + Path to ProteoScape Parquet file. + *args + Additional positional arguments passed to the base class. + **kwargs + Additional keyword arguments passed to the base class. """ - self.filename = filename + super().__init__(filename, *args, **kwargs) - def __len__(self): + def __len__(self) -> int: """Return number of PSMs in file.""" return pq.read_metadata(self.filename).num_rows - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with pq.ParquetFile(self.filename) as reader: for batch in reader.iter_batches(): @@ -54,36 +59,36 @@ def __iter__(self): except Exception as e: raise PSMUtilsIOException(f"Error while parsing row {row}:\n{e}") from e - @classmethod - def from_dataframe(cls, dataframe: pd.DataFrame) -> PSMList: + @staticmethod + def from_dataframe(dataframe: pd.DataFrame) -> PSMList: """Create a PSMList from a ProteoScape Pandas DataFrame.""" return PSMList( - psm_list=[ - cls._get_peptide_spectrum_match(cls(""), entry) - for entry in dataframe.to_dict(orient="records") - ] + psm_list=[_parse_entry(entry) for entry in dataframe.to_dict(orient="records")] # type: ignore[arg-type] ) def _parse_peptidoform( - stripped_peptide: str, ptms: np.ndarray, ptm_locations: np.ndarray, precursor_charge: int + stripped_peptide: str, + ptms: np.ndarray[Any, Any], + ptm_locations: np.ndarray[Any, Any], + precursor_charge: int, ) -> str: """Parse peptide sequence and modifications to ProForma.""" peptidoform = list(stripped_peptide) n_term = "" c_term = "" - for ptm, ptm_location in zip(ptms, ptm_locations): - ptm = format_number_as_string(ptm) + for ptm, ptm_location in zip(ptms, ptm_locations, strict=True): + ptm_str = format_number_as_string(ptm) if ptm_location == -1: - n_term = f"[{ptm}]-" + n_term = f"[{ptm_str}]-" elif ptm_location == len(peptidoform): - c_term = f"-[{ptm}]" + c_term = f"-[{ptm_str}]" else: - peptidoform[ptm_location] = f"{peptidoform[ptm_location]}[{ptm}]" + peptidoform[ptm_location] = f"{peptidoform[ptm_location]}[{ptm_str}]" return f"{n_term}{''.join(peptidoform)}{c_term}/{precursor_charge}" -def _parse_entry(entry: dict) -> PSM: +def _parse_entry(entry: dict[str, Any]) -> PSM: """Parse a single entry from ProteoScape Parquet file to PSM object.""" return PSM( peptidoform=_parse_peptidoform( @@ -93,7 +98,7 @@ def _parse_entry(entry: dict) -> PSM: entry["precursor_charge"], ), spectrum_id=entry["ms2_id"], - run=entry.get("run", None), + run=entry.get("run"), is_decoy=all(DECOY_PATTERN.match(p) for p in entry["locus_name"]), score=entry["x_corr_score"], precursor_mz=entry["precursor_mz"], diff --git a/psm_utils/io/sage.py b/psm_utils/io/sage.py index 2d62682..1111087 100644 --- a/psm_utils/io/sage.py +++ b/psm_utils/io/sage.py @@ -10,11 +10,13 @@ import csv from abc import ABC, abstractmethod +from collections.abc import Iterator from pathlib import Path -from typing import Iterable, Optional +from typing import Any -import pyarrow.parquet as pq -from pyteomics import mass +import pandas as pd +import pyarrow.parquet as pq # type: ignore[import] +from pyteomics import mass # type: ignore[import] from psm_utils.io._base_classes import ReaderBase from psm_utils.io._utils import set_csv_field_size_limit @@ -26,38 +28,45 @@ class _SageReaderBase(ReaderBase, ABC): def __init__( - self, filename, score_column: str = "sage_discriminant_score", *args, **kwargs + self, + filename: str | Path, + *args: Any, + score_column: str = "sage_discriminant_score", + **kwargs: Any, ) -> None: """ - Reader for Sage ``results.sage.tsv`` file. + Reader for Sage results file. Parameters ---------- - filename : str or Path + filename Path to PSM file. - score_column: str, optional + *args + Additional positional arguments passed to parent class. + score_column Name of the column that holds the primary PSM score. Default is ``sage_discriminant_score``, ``hyperscore`` could also be used. + **kwargs + Additional keyword arguments passed to parent class. """ super().__init__(filename, *args, **kwargs) - self.filename = filename self.score_column = score_column @abstractmethod - def __iter__(self) -> Iterable[PSM]: + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" raise NotImplementedError("Use `SageTSVReader` or `SageParquetReader` instead.") - def _get_peptide_spectrum_match(self, psm_dict) -> PSM: + def _get_peptide_spectrum_match(self, psm_dict: dict[str, Any]) -> PSM: """Parse a single PSM from a sage PSM file.""" - rescoring_features = {} + rescoring_features: dict[str, Any] = {} for ft in RESCORING_FEATURES: try: rescoring_features[ft] = psm_dict[ft] except KeyError: continue - + ion_mobility_features = self._extract_ion_mobility_features(psm_dict) rescoring_features.update(ion_mobility_features) @@ -73,36 +82,34 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM: score=float(psm_dict[self.score_column]), precursor_mz=self._parse_precursor_mz(psm_dict["expmass"], psm_dict["charge"]), retention_time=float(psm_dict["rt"]), - ion_mobility=rescoring_features.get("ion_mobility", None), + ion_mobility=rescoring_features.get("ion_mobility"), protein_list=psm_dict["proteins"].split(";"), source="sage", rank=int(float(psm_dict["rank"])), - provenance_data=({"sage_filename": str(self.filename)}), + provenance_data={"sage_filename": self.filename.as_posix()}, rescoring_features=rescoring_features, metadata={}, ) @staticmethod - def _parse_peptidoform(peptide: str, charge: Optional[str]) -> str: + def _parse_peptidoform(peptide: str, charge: str | None) -> str: + """Parse peptide sequence and charge to peptidoform string.""" if charge: peptide += f"/{int(float(charge))}" return peptide @staticmethod - def _parse_precursor_mz(expmass: str, charge: Optional[str]) -> Optional[float]: + def _parse_precursor_mz(expmass: str, charge: str | None) -> float | None: + """Parse experimental mass and charge to precursor m/z.""" if charge: - charge = float(charge) - expmass = float(expmass) - return (expmass + (mass.nist_mass["H"][1][0] * charge)) / charge - else: - return None - + charge_val = float(charge) + expmass_val = float(expmass) + return (expmass_val + (mass.nist_mass["H"][1][0] * charge_val)) / charge_val + return None + @staticmethod - def _extract_ion_mobility_features(psm_dict: dict) -> dict: - """ - Extract ion mobility features from the PSM dictionary if present and non-zero. - Returns a dict with the relevant keys or an empty dict. - """ + def _extract_ion_mobility_features(psm_dict: dict[str, Any]) -> dict[str, float]: + """Extract ion mobility features from the PSM dictionary if present and non-zero.""" try: ion_mob = float(psm_dict["ion_mobility"]) if ion_mob: @@ -116,33 +123,37 @@ def _extract_ion_mobility_features(psm_dict: dict) -> dict: return {} @classmethod - def from_dataframe(cls, dataframe) -> PSMList: + def from_dataframe(cls, dataframe: pd.DataFrame) -> PSMList: """Create a PSMList from a Sage Pandas DataFrame.""" return PSMList( psm_list=[ - cls._get_peptide_spectrum_match(cls(""), entry) + cls._get_peptide_spectrum_match(cls(""), entry) # type: ignore[arg-type] for entry in dataframe.to_dict(orient="records") ] ) class SageTSVReader(_SageReaderBase): - def __iter__(self) -> Iterable[PSM]: + """Reader for Sage TSV results files.""" + + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "r") as open_file: + with open(self.filename) as open_file: reader = csv.DictReader(open_file, delimiter="\t") for row in reader: row["is_decoy"] = ( True if row["label"] == "-1" else False if row["label"] == "1" else None ) - yield self._get_peptide_spectrum_match(row) + SageReader = SageTSVReader # Alias for backwards compatibility class SageParquetReader(_SageReaderBase): - def __iter__(self) -> Iterable[PSM]: + """Reader for Sage Parquet results files.""" + + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" with pq.ParquetFile(self.filename) as pq_file: for batch in pq_file.iter_batches(): diff --git a/psm_utils/io/tsv.py b/psm_utils/io/tsv.py index 268cf96..9fd66a3 100644 --- a/psm_utils/io/tsv.py +++ b/psm_utils/io/tsv.py @@ -51,8 +51,9 @@ import ast import csv import logging +from collections.abc import Iterator from pathlib import Path -from typing import Optional +from typing import Any, TextIO from pydantic import ValidationError @@ -70,9 +71,9 @@ class TSVReader(ReaderBase): """Reader for psm_utils TSV format.""" - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with open(self.filename, "rt") as open_file: + with open(self.filename) as open_file: reader = csv.DictReader(open_file, delimiter="\t") failed_rows = 0 for row in reader: @@ -91,8 +92,8 @@ def __iter__(self): failed_rows = 0 @staticmethod - def _parse_entry(entry: dict) -> dict: - """Parse single TSV entry to :py:class:`~psm_utils.psm.PSM`.""" + def _parse_entry(entry: dict[str, str | None]) -> dict[str, Any]: + """Parse single TSV entry to PSM dict.""" # Replace empty strings with None entry = {k: v if v else None for k, v in entry.items()} @@ -106,17 +107,17 @@ def _parse_entry(entry: dict) -> dict: ) from e # Extract dict properties - parsed_entry = {} - provenance_data = {} - metadata = {} - rescoring_features = {} + parsed_entry: dict[str, Any] = {} + provenance_data: dict[str, str | None] = {} + metadata: dict[str, str | None] = {} + rescoring_features: dict[str, str | None] = {} for k, v in entry.items(): if k.startswith("provenance:"): - provenance_data[k[11:]] = str(v) + provenance_data[k[11:]] = v elif k.startswith("meta:"): - metadata[k[5:]] = str(v) + metadata[k[5:]] = v elif k.startswith("rescoring:"): - rescoring_features[k[10:]] = str(v) + rescoring_features[k[10:]] = v else: parsed_entry[k] = v @@ -132,23 +133,25 @@ def _parse_entry(entry: dict) -> dict: class TSVWriter(WriterBase): - """Reader for psm_utils TSV format.""" + """Writer for psm_utils TSV format.""" def __init__( self, filename: str | Path, - example_psm: Optional[PSM] = None, - *args, - **kwargs, - ): + *args: Any, + example_psm: PSM | None = None, + **kwargs: Any, + ) -> None: """ - Reader for psm_utils TSV format. + Writer for psm_utils TSV format. Parameters ---------- - filename: str, Pathlib.Path + filename Path to PSM file. - example_psm: psm_utils.psm.PSM, optional + *args + Additional positional arguments passed to the base class. + example_psm Example PSM, required to extract the column names when writing to a new file. Should contain all fields that are to be written to the PSM file, i.e., all items in the :py:attr:`provenance_data`, :py:attr:`metadata`, and @@ -156,23 +159,27 @@ def __init__( not present in the example PSM will not be written to the file, even though they are present in other PSMs passed to :py:meth:`write_psm` or :py:meth:`write_file`. + **kwargs + Additional keyword arguments passed to the base class. + """ super().__init__(filename, *args, **kwargs) - self._open_file = None - self._writer = None + self._open_file: TextIO | None = None + self._writer: csv.DictWriter[str] | None = None if example_psm: - self.fieldnames = self._psm_to_entry(example_psm).keys() + self.fieldnames: list[str] | None = list(self._psm_to_entry(example_psm).keys()) else: self.fieldnames = None def __enter__(self) -> TSVWriter: + """Enter context manager for file writing.""" if Path(self.filename).is_file(): - with open(self.filename, "rt") as open_file: + with open(self.filename) as open_file: # Get fieldnames self.fieldnames = open_file.readline().strip().split("\t") - self._open_file = open(self.filename, "at", newline="") + self._open_file = open(self.filename, "a", newline="") self._writer = csv.DictWriter( self._open_file, fieldnames=self.fieldnames, @@ -182,7 +189,7 @@ def __enter__(self) -> TSVWriter: else: if not self.fieldnames: raise ValueError("`example_psm` required when writing to new file.") - self._open_file = open(self.filename, "wt", newline="") + self._open_file = open(self.filename, "w", newline="") self._writer = csv.DictWriter( self._open_file, fieldnames=self.fieldnames, @@ -192,43 +199,44 @@ def __enter__(self) -> TSVWriter: self._writer.writeheader() return self - def __exit__(self, *args, **kwargs) -> None: - self._open_file.close() - self._open_file = None + def __exit__(self, *args: Any, **kwargs: Any) -> None: + """Exit context manager and clean up file resources.""" + if self._open_file is not None: + self._open_file.close() + self._open_file = None self._writer = None - def write_psm(self, psm: PSM): + def write_psm(self, psm: PSM) -> None: """ Write a single PSM to new or existing PSM file. Parameters ---------- - psm: PSM + psm PSM object to write. """ - entry = self._psm_to_entry(psm) - try: - self._writer.writerow(entry) - except AttributeError as e: + if self._writer is None: raise PSMUtilsIOException( f"`write_psm` method can only be called if `{self.__class__.__qualname__}`" "is opened in context (i.e., using the `with` statement)." - ) from e + ) + entry = self._psm_to_entry(psm) + self._writer.writerow(entry) - def write_file(self, psm_list: PSMList): + def write_file(self, psm_list: PSMList) -> None: """ Write an entire PSMList to a new PSM file. Parameters ---------- - psm_list: PSMList + psm_list PSMList object to write to file. """ if not self.fieldnames: raise ValueError("`example_psm` required when writing to new file.") - with open(self.filename, "wt", newline="") as f: + with open(self.filename, "w", newline="") as f: writer = csv.DictWriter( f, fieldnames=self.fieldnames, delimiter="\t", extrasaction="ignore" ) @@ -237,7 +245,8 @@ def write_file(self, psm_list: PSMList): writer.writerow(self._psm_to_entry(psm)) @staticmethod - def _psm_to_entry(psm: PSM) -> dict: + def _psm_to_entry(psm: PSM) -> dict[str, Any]: + """Convert PSM object to dictionary entry for TSV writing.""" entry = psm.__dict__.copy() # Convert Peptidoform to proforma sequence diff --git a/psm_utils/io/xtandem.py b/psm_utils/io/xtandem.py index 43c9966..1102e2a 100644 --- a/psm_utils/io/xtandem.py +++ b/psm_utils/io/xtandem.py @@ -1,10 +1,8 @@ """ Interface with X!Tandem XML PSM files. - Notes ----- - * In X!Tandem XML, N/C-terminal modifications are encoded as normal modifications and are therefore parsed accordingly. Any information on which modifications are N/C-terminal is therefore lost. @@ -35,6 +33,7 @@ .. code-block:: [+39,99545] + """ from __future__ import annotations @@ -42,11 +41,12 @@ import logging import re import xml.etree.ElementTree as ET +from collections.abc import Iterator from pathlib import Path -from typing import Union +from typing import Any import numpy as np -from pyteomics import tandem +from pyteomics import tandem # type: ignore[import] from psm_utils.exceptions import PSMUtilsException from psm_utils.io._base_classes import ReaderBase @@ -57,33 +57,38 @@ class XTandemReader(ReaderBase): + """Reader for X!Tandem XML PSM files.""" + def __init__( self, - filename: Union[str, Path], - *args, - decoy_prefix="DECOY_", - score_key="expect", - **kwargs, + filename: str | Path, + *args: Any, + decoy_prefix: str = "DECOY_", + score_key: str = "expect", + **kwargs: Any, ) -> None: """ Reader for X!Tandem XML PSM files. Parameters ---------- - filename: str, pathlib.Path + filename Path to PSM file. - decoy_prefix: str, optional + *args + Additional positional arguments passed to parent class. + decoy_prefix Protein name prefix used to denote decoy protein entries. Default: ``"DECOY_"``. - score_key: str, optional + score_key Key of score to use as PSM score. One of ``"expect"``, ``"hyperscore"``, ``"delta"``, or ``"nextscore"``. Default: ``"expect"``. The ``"expect"`` score (e-value) is converted to its negative natural logarithm to facilitate downstream analysis. + **kwargs + Additional keyword arguments passed to parent class. Examples -------- - :py:class:`XTandemReader` supports iteration: >>> from psm_utils.io.xtandem import XTandemReader @@ -101,30 +106,29 @@ def __init__( >>> psm_list = reader.read_file() """ - super().__init__(filename) + super().__init__(filename, *args, **kwargs) self.decoy_prefix = decoy_prefix self.score_key = score_key - def __iter__(self): + def __iter__(self) -> Iterator[PSM]: """Iterate over file and return PSMs one-by-one.""" - with tandem.read(str(self.filename)) as reader: run = self._parse_run(self.filename) for entry in reader: - for psm in self._parse_entry(entry, run): - yield psm + yield from self._parse_entry(entry, run) @staticmethod - def _parse_peptidoform(peptide_entry, charge): + def _parse_peptidoform(peptide_entry: dict[str, Any], charge: int) -> Peptidoform: + """Parse peptidoform from X!Tandem peptide entry.""" if "aa" in peptide_entry: # Parse modifications - seq_list = list(peptide_entry["seq"]) - unmodified_seq = seq_list.copy() + seq_list: list[str] = list(peptide_entry["seq"]) + unmodified_seq: list[str] = seq_list.copy() for mod_entry in peptide_entry["aa"]: # Locations are encoded relative to position in protein - mod_loc = mod_entry["at"] - peptide_entry["start"] - mass_shift = float(mod_entry["modified"]) + mod_loc: int = mod_entry["at"] - peptide_entry["start"] + mass_shift: float = float(mod_entry["modified"]) # Check if site matches amino acid if not mod_entry["type"] == unmodified_seq[mod_loc]: @@ -136,7 +140,7 @@ def _parse_peptidoform(peptide_entry, charge): # Add to sequence in ProForma format seq_list[mod_loc] += f"[{format_number_as_string(mass_shift)}]" - proforma_seq = "".join(seq_list) + proforma_seq: str = "".join(seq_list) else: # No modifications to parse @@ -146,17 +150,17 @@ def _parse_peptidoform(peptide_entry, charge): return Peptidoform(proforma_seq) - def _parse_entry(self, entry, run: str) -> list: - """Parse X!Tandem XML entry to a list of :py:class:`~psm_utils.psm.PSM`.""" - pepform_to_psms = dict() + def _parse_entry(self, entry: dict[str, Any], run: str) -> list[PSM]: + """Parse X!Tandem XML entry to a list of PSMs.""" + peptidoform_psm_dict: dict[Peptidoform, PSM] = {} for protein_entry in entry["protein"]: peptide_entry = protein_entry["peptide"] peptidoform = self._parse_peptidoform(peptide_entry, entry["z"]) - if peptidoform not in pepform_to_psms: + if peptidoform not in peptidoform_psm_dict: psm = PSM( - peptidoform=self._parse_peptidoform(peptide_entry, entry["z"]), + peptidoform=peptidoform, spectrum_id=entry["support"]["fragment ion mass spectrum"]["note"], is_decoy=protein_entry["label"].startswith(self.decoy_prefix), score=( @@ -170,7 +174,7 @@ def _parse_entry(self, entry, run: str) -> list: protein_list=[protein_entry["note"]], source="X!Tandem", provenance_data={ - "xtandem_filename": str(self.filename), + "xtandem_filename": self.filename.as_posix(), "xtandem_id": str(entry["id"]), }, metadata={ @@ -179,21 +183,26 @@ def _parse_entry(self, entry, run: str) -> list: "xtandem_nextscore": str(peptide_entry["nextscore"]), }, ) - pepform_to_psms[peptidoform] = psm + peptidoform_psm_dict[peptidoform] = psm else: - pepform_to_psms[peptidoform].protein_list.append(protein_entry["note"]) - - return list(pepform_to_psms.values()) + psm_protein_list = peptidoform_psm_dict[peptidoform].protein_list + if psm_protein_list is None: + peptidoform_psm_dict[peptidoform].protein_list = [protein_entry["note"]] + else: + psm_protein_list.append(protein_entry["note"]) - def _parse_run(self, filepath): - """Parse X!Tandem XML run to :py:class:`~psm_utils.psm.PSM`.""" + return list(peptidoform_psm_dict.values()) + def _parse_run(self, filepath: str | Path) -> str: + """Parse run name from X!Tandem XML file.""" tree = ET.parse(str(filepath)) root = tree.getroot() - full_label = root.attrib["label"] - run_match = re.search(r"\/(?P[^\s\/\\]+)\.(?Pmgf|mzML|mzml)", full_label) + full_label: str = root.attrib["label"] + run_match: re.Match[str] | None = re.search( + r"\/(?P[^\s\/\\]+)\.(?Pmgf|mzML|mzml)", full_label + ) if run_match: - run = run_match.group("run") + run: str = run_match.group("run") else: run = Path(filepath).stem logger.warning( @@ -205,8 +214,12 @@ def _parse_run(self, filepath): class XTandemException(PSMUtilsException): + """Base exception for X!Tandem related errors.""" + pass class XTandemModificationException(XTandemException): + """Exception raised for unexpected modifications in X!Tandem XML files.""" + pass diff --git a/psm_utils/peptidoform.py b/psm_utils/peptidoform.py index 7f0923a..bb7335f 100644 --- a/psm_utils/peptidoform.py +++ b/psm_utils/peptidoform.py @@ -1,21 +1,22 @@ +"""Peptidoform module for handling peptide sequences with modifications and charge states.""" + from __future__ import annotations from collections import defaultdict -from typing import Iterable, List, Tuple, TypedDict, Union, cast +from collections.abc import Iterable +from typing import Literal, TypedDict, cast import numpy as np -from pyteomics import mass, proforma +from pyteomics import mass, proforma # type: ignore[import] from psm_utils.exceptions import PSMUtilsException from psm_utils.utils import mass_to_mz class Peptidoform: - """ - Peptide sequence, modifications and charge state represented in ProForma notation. - """ + """Peptide sequence, modifications and charge state represented in ProForma notation.""" - def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: + def __init__(self, proforma_sequence: str | proforma.ProForma) -> None: """ Peptide sequence, modifications and charge state represented in ProForma notation. @@ -41,7 +42,7 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: 711.2567622919099 """ - self.parsed_sequence: List[Tuple[str, List[proforma.TagBase] | None]] + self.parsed_sequence: list[tuple[str, list[proforma.TagBase] | None]] self.properties: PeptidoformProperties # Parse ProForma @@ -64,15 +65,19 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None: raise NotImplementedError("Peptidoforms with isotopes are currently not supported.") def __repr__(self) -> str: + """Return a string representation of the Peptidoform object.""" return f"{self.__class__.__qualname__}('{self.proforma}')" def __str__(self) -> str: + """Return the ProForma representation of the Peptidoform.""" return self.proforma def __hash__(self) -> int: + """Return a hash of the Peptidoform based on its ProForma representation.""" return hash(self.proforma) def __eq__(self, __o: object) -> bool: + """Check equality of Peptidoform with another object.""" if isinstance(__o, str): return self.proforma == __o elif isinstance(__o, Peptidoform): # type: ignore[return] @@ -81,6 +86,7 @@ def __eq__(self, __o: object) -> bool: raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}") def __lt__(self, __o: object) -> bool: + """Check if this Peptidoform is less than another object.""" if isinstance(__o, str): return self.proforma < __o elif isinstance(__o, Peptidoform): @@ -88,13 +94,16 @@ def __lt__(self, __o: object) -> bool: else: raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}") - def __iter__(self) -> Iterable[Tuple[str, Union[None, List[proforma.TagBase]]]]: + def __iter__(self) -> Iterable[tuple[str, None | list[proforma.TagBase]]]: + """Return an iterator over the parsed sequence.""" return self.parsed_sequence.__iter__() def __len__(self) -> int: + """Return the length of the parsed sequence.""" return self.parsed_sequence.__len__() - def __getitem__(self, key: int) -> Tuple[str, Union[None, List[proforma.TagBase]]]: + def __getitem__(self, key: int) -> tuple[str, None | list[proforma.TagBase]]: + """Get the item at the specified index from the parsed sequence.""" return self.parsed_sequence.__getitem__(key) @property @@ -136,7 +145,7 @@ def modified_sequence(self) -> str: 'AC[U:4]DEK' """ - properties_without_charge = self.properties.copy() + properties_without_charge = dict(self.properties).copy() properties_without_charge.pop("charge_state", None) return proforma.to_proforma(self.parsed_sequence, **properties_without_charge) @@ -168,15 +177,14 @@ def is_modified(self) -> bool: modifications. """ - mod_properties = [ - "n_term", - "c_term", - "unlocalized_modifications", - "labile_modifications", - "fixed_modifications", - ] has_sequential = any(mods for _, mods in self.parsed_sequence) - has_other = any([self.properties[prop] for prop in mod_properties]) + has_other = ( + bool(self.properties["n_term"]) + or bool(self.properties["c_term"]) + or bool(self.properties["unlocalized_modifications"]) + or bool(self.properties["labile_modifications"]) + or bool(self.properties["fixed_modifications"]) + ) return has_sequential or has_other @property @@ -240,7 +248,7 @@ def sequential_composition(self) -> list[mass.Composition]: position_comp += tag.composition except (AttributeError, KeyError) as e: raise ModificationException( - "Cannot resolve composition for modification " f"{tag.value}." + f"Cannot resolve composition for modification {tag.value}." ) from e comp_list.append(position_comp) @@ -348,7 +356,7 @@ def sequential_theoretical_mass(self) -> list[float]: position_mass += tag.mass except (AttributeError, KeyError) as e: raise ModificationException( - "Cannot resolve mass for modification " f"{tag.value}." + f"Cannot resolve mass for modification {tag.value}." ) from e mass_list.append(position_mass) @@ -368,7 +376,8 @@ def sequential_theoretical_mass(self) -> list[float]: @property def theoretical_mass(self) -> float: - """Monoisotopic mass of the full uncharged peptidoform. + """ + Monoisotopic mass of the full uncharged peptidoform. Includes all modifications, also labile and unlocalized. @@ -427,7 +436,7 @@ def rename_modifications(self, mapping: dict[str, str]) -> None: requires renaming. Modification labels that are not in the mapping will not be renamed. - See also + See Also -------- psm_utils.psm_list.PSMList.rename_modifications @@ -443,29 +452,41 @@ def rename_modifications(self, mapping: dict[str, str]) -> None: """ - def _rename_modification_list(mods): + def _rename_modification_list( + mods: list[proforma.TagBase] | None, + ) -> list[proforma.TagBase] | None: + if mods is None: + return None + new_mods = [] for mod in mods: - try: - if isinstance(mod, proforma.MassModification): - mod_value = format_number_as_string(mod.value) - else: - mod_value = mod.value - if mod_value in mapping: - new_mods.append(proforma.process_tag_tokens(mapping[mod_value])) - else: - new_mods.append(mod) - except AttributeError: - if isinstance(mod, proforma.ModificationRule): - if mod.modification_tag.value in mapping: - mod.modification_tag = proforma.process_tag_tokens( - mapping[mod.modification_tag.value] - ) - new_mods.append(mod) - else: - mod.value # re-raise AttributeError + # Get value of the tag, formatted as string + if isinstance(mod, proforma.MassModification): + mod_value = format_number_as_string(mod.value) + else: + mod_value = mod.value + + # Rename modification if it is in the mapping + if mod_value in mapping: + new_mods.append(proforma.process_tag_tokens(mapping[mod_value])) + else: + new_mods.append(mod) + return new_mods + def _rename_modification_rule_list( + rules: list[proforma.ModificationRule] | None, + ) -> None: + if rules is None: + return None + + for rule in rules: + # Rename modification tag if it is in the mapping + if rule.modification_tag.value in mapping: + rule.modification_tag = proforma.process_tag_tokens( + mapping[rule.modification_tag.value] + ) + # Sequential modifications for i, (aa, mods) in enumerate(self.parsed_sequence): if mods: @@ -473,15 +494,15 @@ def _rename_modification_list(mods): self.parsed_sequence[i] = (aa, new_mods) # Non-sequence modifications - for mod_type in [ + for mod_type in ( "n_term", "c_term", "unlocalized_modifications", - "labile_modifications", - "fixed_modifications", - ]: - if self.properties[mod_type]: - self.properties[mod_type] = _rename_modification_list(self.properties[mod_type]) + ): + self.properties[mod_type] = _rename_modification_list(self.properties[mod_type]) # type: ignore[assignment] + + # Modification rules + _rename_modification_rule_list(self.properties["fixed_modifications"]) def add_fixed_modifications( self, modification_rules: list[tuple[str, list[str]]] | dict[str, list[str]] @@ -493,7 +514,7 @@ def add_fixed_modifications( added in the "fixed modifications" notation, at the front of the ProForma sequence. - See also + See Also -------- psm_utils.peptidoform.Peptidoform.apply_fixed_modifications @@ -531,7 +552,7 @@ def apply_fixed_modifications(self): (once at the beginning of the sequence) as modifications throughout the sequence at each affected amino acid residue. - See also + See Also -------- psm_utils.peptidoform.Peptidoform.apply_fixed_modifications @@ -583,6 +604,13 @@ class PeptidoformProperties(TypedDict): isotopes: list[proforma.StableIsotope] +_ModificationsProperty = Literal[ + "n_term", "c_term", "unlocalized_modifications", "labile_modifications" +] + +_ModificationRulesProperty = Literal["fixed_modifications"] + + def format_number_as_string(num): """Format number as string for ProForma mass modifications.""" # Using this method over `:+g` string formatting to avoid rounding and scientific notation diff --git a/psm_utils/psm.py b/psm_utils/psm.py index 9888d72..4fd3520 100644 --- a/psm_utils/psm.py +++ b/psm_utils/psm.py @@ -1,8 +1,10 @@ +"""PSM module for handling peptide-spectrum matches.""" + from __future__ import annotations -from typing import Any, Dict, List, Optional, Union +from typing import Any -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, field_validator from psm_utils.peptidoform import Peptidoform @@ -10,29 +12,41 @@ class PSM(BaseModel): """Data class representing a peptide-spectrum match (PSM).""" - peptidoform: Union[Peptidoform, str] # type: ignore + peptidoform: Peptidoform spectrum_id: str - run: Optional[str] = None - collection: Optional[str] = None - spectrum: Optional[Any] = None - is_decoy: Optional[bool] = None - score: Optional[float] = None - qvalue: Optional[float] = None - pep: Optional[float] = None - precursor_mz: Optional[float] = None - retention_time: Optional[float] = None - ion_mobility: Optional[float] = None - protein_list: Optional[List[str]] = None - rank: Optional[int] = None - source: Optional[str] = None - provenance_data: Optional[Dict[str, str]] = dict() - metadata: Optional[Dict[str, str]] = dict() - rescoring_features: Optional[Dict[str, float]] = dict() + run: str | None = None + collection: str | None = None + spectrum: Any | None = None + is_decoy: bool | None = None + score: float | None = None + qvalue: float | None = None + pep: float | None = None + precursor_mz: float | None = None + retention_time: float | None = None + ion_mobility: float | None = None + protein_list: list[str] | None = None + rank: int | None = None + source: str | None = None + provenance_data: dict[str, str] | None = dict() + metadata: dict[str, str] | None = dict() + rescoring_features: dict[str, float] | None = dict() + model_config = ConfigDict(arbitrary_types_allowed=True, coerce_numbers_to_str=True) - def __init__(self, **data): + @field_validator("peptidoform", mode="before") + @classmethod + def validate_peptidoform(cls, v: Peptidoform | str) -> Peptidoform: + """Convert string to Peptidoform if needed.""" + if isinstance(v, str): + return Peptidoform(v) + elif isinstance(v, Peptidoform): + return v + else: + raise TypeError(f"Peptidoform or str expected for `peptidoform`, not `{type(v)}`.") + + def __init__(self, **data: Any) -> None: # noqa: D417 """ - Data class representing a peptide-spectrum match (PSM). + Initialize a peptide-spectrum match (PSM). Links a :class:`~psm_utils.peptidoform.Peptidoform` to an observed spectrum and holds the related information. Attribute types are coerced and enforced upon @@ -87,18 +101,13 @@ def __init__(self, **data): """ super().__init__(**data) - # Parse peptidoform - if isinstance(self.peptidoform, str): - self.peptidoform: Peptidoform = Peptidoform(self.peptidoform) - elif not isinstance(self.peptidoform, Peptidoform): - raise TypeError( - f"Peptidoform or str expected for `peptidoform`, not `{type(self.peptidoform)}`." - ) def __getitem__(self, item) -> Any: + """Get an attribute of the PSM.""" return getattr(self, item) def __setitem__(self, item, value: Any) -> None: + """Set an attribute of the PSM.""" setattr(self, item, value) @property diff --git a/psm_utils/psm_list.py b/psm_utils/psm_list.py index 8190611..9309125 100644 --- a/psm_utils/psm_list.py +++ b/psm_utils/psm_list.py @@ -1,12 +1,15 @@ +"""PSMList module for handling collections of PSMs.""" + from __future__ import annotations import re -from typing import Iterator, List, Sequence, cast +from collections.abc import Iterator, Sequence +from typing import cast, overload import numpy as np import pandas as pd from pydantic import BaseModel -from pyteomics import auxiliary, proforma +from pyteomics import auxiliary, proforma # type: ignore[import] from rich.pretty import pretty_repr from psm_utils.psm import NUMPY_DTYPES, PSM @@ -15,11 +18,11 @@ class PSMList(BaseModel): """Data class representing a list of PSMs.""" - psm_list: List[PSM] + psm_list: list[PSM] - def __init__(__pydantic_self__, **data) -> None: + def __init__(__pydantic_self__, **data) -> None: # type: ignore[override] # noqa: D417 """ - Data class representing a list of PSMs, with some useful functionality. + Represent a list of PSMs in a data class with added functionality. Parameters ---------- @@ -72,25 +75,41 @@ def __init__(__pydantic_self__, **data) -> None: super().__init__(**data) def __rich_repr__(self): + """Rich representation of the PSMList.""" yield "psm_list", self.psm_list def __repr__(self): + """Return a pretty representation of the PSMList.""" return pretty_repr(self, max_length=5) def __str__(self): + """Return a string representation of the PSMList.""" return self.__repr__() - def __add__(self, other): + def __add__(self, other: PSMList) -> PSMList: + """Concatenate two PSMLists.""" return PSMList(psm_list=self.psm_list + other.psm_list) def __iter__(self) -> Iterator[PSM]: # type: ignore[override] - return self.psm_list.__iter__() + """Iterate over the PSMList.""" + return iter(self.psm_list) def __len__(self) -> int: - return self.psm_list.__len__() + """Return the length of the PSMList.""" + return len(self.psm_list) + + @overload + def __getitem__(self, item: int | np.integer) -> PSM: ... + + @overload + def __getitem__(self, item: slice | Sequence[bool | int] | np.ndarray) -> PSMList: ... + + @overload + def __getitem__(self, item: str | Sequence[str]) -> np.ndarray: ... def __getitem__(self, item) -> PSM | PSMList | np.ndarray: - if isinstance(item, (int, np.integer)): + """Get PSM or PSMList by index, slice, or property name.""" + if isinstance(item, int | np.integer): # Return single PSM by index return self.psm_list[item] elif isinstance(item, slice): @@ -119,6 +138,13 @@ def __getitem__(self, item) -> PSM | PSMList | np.ndarray: raise TypeError(f"Unsupported indexing type: {type(item)}") def __setitem__(self, item, values: Sequence) -> None: + """ + Set PSM property values for all PSMs in :py:class:`PSMList`. + + If the length of `values` does not match the length of the PSMList, + a ValueError is raised. + + """ if not len(values) == len(self): raise ValueError(f"Expected value with same length as PSMList: {len(self)}") for value, psm in zip(values, self): @@ -270,7 +296,7 @@ def rename_modifications(self, mapping: dict[str, str]) -> None: requires renaming. Modification labels that are not in the mapping will not be renamed. - See also + See Also -------- psm_utils.peptidoform.Peptidoform.rename_modifications @@ -288,7 +314,7 @@ def add_fixed_modifications( added in the "fixed modifications" notation, at the front of the ProForma sequence. - See also + See Also -------- psm_utils.peptidoform.Peptidoform.add_fixed_modifications @@ -319,7 +345,7 @@ def apply_fixed_modifications(self): Applies :py:meth:`psm_utils.peptidoform.Peptidoform.apply_fixed_modifications` on all PSM peptidoforms in the :py:class:`PSMList`. - See also + See Also -------- psm_utils.peptidoform.Peptidoform.apply_fixed_modifications @@ -337,18 +363,20 @@ def to_dataframe(self) -> pd.DataFrame: def _is_iterable_of_bools(obj): + """Check if the object is an iterable of booleans.""" try: - if all(isinstance(x, (bool, np.bool_)) for x in obj): - return True - else: + if any(not isinstance(x, bool | np.bool_) for x in obj): return False + else: + return True except (TypeError, ValueError): return False def _is_iterable_of_ints(obj): + """Check if the object is an iterable of integers.""" try: - if not all(isinstance(x, (int, np.integer)) for x in obj): + if any(not isinstance(x, int | np.integer) for x in obj): return False else: return True @@ -357,8 +385,9 @@ def _is_iterable_of_ints(obj): def _is_iterable_of_strings(obj): + """Check if the object is an iterable of strings.""" try: - if not all(isinstance(x, str) for x in obj): + if any(not isinstance(x, str) for x in obj): return False else: return True diff --git a/psm_utils/utils.py b/psm_utils/utils.py index 7df9b33..82a65a1 100644 --- a/psm_utils/utils.py +++ b/psm_utils/utils.py @@ -1,11 +1,11 @@ """Various utility functions.""" -from typing import Optional +from __future__ import annotations -from pyteomics.mass import nist_mass +from pyteomics.mass import nist_mass # type: ignore[import-untyped] -def mass_to_mz(mass: float, charge: int, adduct_mass: Optional[float] = None) -> float: +def mass_to_mz(mass: float, charge: int, adduct_mass: float | None = None) -> float: """ Convert mass to m/z. @@ -20,11 +20,13 @@ def mass_to_mz(mass: float, charge: int, adduct_mass: Optional[float] = None) -> """ if adduct_mass is None: - adduct_mass = nist_mass["H"][1][0] - return (mass + charge * adduct_mass) / charge + _adduct_mass = nist_mass["H"][1][0] + else: + _adduct_mass = float(adduct_mass) + return (mass + charge * _adduct_mass) / charge -def mz_to_mass(mz: float, charge: int, adduct_mass: Optional[float] = None) -> float: +def mz_to_mass(mz: float, charge: int, adduct_mass: float | None = None) -> float: """ Convert m/z to mass. @@ -39,5 +41,7 @@ def mz_to_mass(mz: float, charge: int, adduct_mass: Optional[float] = None) -> f """ if adduct_mass is None: - adduct_mass = nist_mass["H"][1][0] - return mz * charge - charge * adduct_mass + _adduct_mass = nist_mass["H"][1][0] + else: + _adduct_mass = float(adduct_mass) + return mz * charge - charge * _adduct_mass diff --git a/pyproject.toml b/pyproject.toml index 39e1125..8d67852 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ "Development Status :: 4 - Beta", ] dynamic = ["version"] -requires-python = ">=3.8" +requires-python = ">=3.10" dependencies = [ "click", "lxml", @@ -29,7 +29,7 @@ dependencies = [ "pydantic >= 2", "pyteomics >= 4", "rich", - "sqlalchemy", + "sqlalchemy >= 2", ] [project.optional-dependencies] @@ -71,8 +71,15 @@ profile = "black" [tool.black] line-length = 99 -target-version = ['py38'] +target-version = ['py310'] [tool.ruff] line-length = 99 -target-version = "py38" +target-version = "py310" + +[tool.ruff.lint] +select = ["E4", "E7", "E9", "F", "PD", "D", "UP"] +ignore = ["D203", "D212"] + +[tool.mypy] +files = ["psm_utils/**/*.py"] diff --git a/tests/test_data/minimal_test.msf b/tests/test_data/minimal_test.msf new file mode 100644 index 0000000000000000000000000000000000000000..1a8bf89af821600a18a768ff1ae75d83e9e666ff GIT binary patch literal 32768 zcmeI&J#X7a7zc1tazxoq!MYiQydiV|!#2Xj}yU17scV9`cUAJCZnB?R(p{ zU|P@R15;?a^r0ln(k)GsBq>9mJbl8XP-ZUNpl^91pOno=MN9sGY8B->NmbAOxwy7) zc|M;vXcGwn5P$##AOHbh;NXU$6joQ|!?G|R_gRfu?r?{@g4vE2*~nL0^{q}_>ui0o zUDqO2+S-CPm3CeJb+7L}H)@*E?9}hnTOY?_#?7RK)R<>+dtcbDqjl;JJCS3oAKc6- zh4pp$P!IavG5H=7VdtY;N~HJEy3^|Z;YVkb!j&uXkvBo_4aV%0$wfLnBB$bi+h0bG zU6#gebeSXU-q3dTnr@eMhWo5(4p=}RAHS*Se-Qa}@R;#%VnoOEFh8dhmY3zDuO`Qo z&U!p%lsgk)IWGV2TxzT@%(mrnrqM;P9jDl`g#FC)r>Sq|C4lWPZV|BIx&0v4a0KH$ z#+hS{4jiR8+Q}+~jScx_HK3r0Z86Uqk4?}UZ&uk{glAU745(;W+8hZQJ`T`!T1I25 z^+>y0e-w8Kf>S2OZEiEC+aIQcOy2_Hjoc0v5^ceWCP6;vN7WQ;&*j5#;>C~jrQM|~ zAi3V~8m8?82TKkLs_)zh#7}3~9M<{{4M#=5x{t?Hj({ z@!wX=-n*ph8=p7(0s#m>00Izz00bZa0SG_<0uX?}|4ZOp zHkXm-mlXNcoL;%LySw-!_3vDU3MIcVON9@`ZyFU|kaL-N#V`E)*YExG1JyN$FZ>-w z>;Ko1`n&qG`kD%mAOHafKmY;|fB*y_009U<00Iy=C4oz{*3X__mgQIS>e+_rS*{rU z)z&KOnM<_j4{I0WwY2&VYZqd*`2GJW4Jn$100bZa0SG_<0uX=z1Rwwb2)si9tpDF3 bHkyI}1Rwwb2tWV=5P$##AOHafoRYv_CJu<5 literal 0 HcmV?d00001 diff --git a/tests/test_data/minimal_v79_test.msf b/tests/test_data/minimal_v79_test.msf new file mode 100644 index 0000000000000000000000000000000000000000..874f9acc1fc970ca61b93b6862b68d8aa59f763f GIT binary patch literal 663552 zcmeFa31DMaeebU;$&%$wNhU!VCc|h*!q}N{#>;G;%uIYox>~R#%aSZ(jB&1|J2N+t zC6AWLIDx=;CLsYzDQjt2(y+8V2!+Q|+OPzg04;^S(mqPt2hC$C{qu(wXv=Hi0q=Lt zy;suJVtc0WUTnV3MAkj$cb4Dhx14kCwdLegL@g`cQ-$J^T=tGyuCfS%W!&qvSS(@s z{}KA%_zKb+yKzC^g}V3c-i9r=W`qu^b-KUGWbSkS#Qh`p58U5%f7|_a_g5W{c4QoP z*hf2_V;!{pPn{5JnxN7ro6G~NMv7KE$_5hq_=C};z_sD zGdL(baG1|9p`22RNPsUsUaVd zOJ{tAtTI2hsGe>HqUlvxqHY(}yj)yq0A|!zyJ2!I`tcQ(mK272uB@Cd&vV#%**Z<9 zW8tZ3iE(nxHBEY-S1afAD@)a`^zz0LNO*vZc>7fjPjXPOsQIjNzH~N6;lChP$_0MC z&|nRDVYsp6>I=)gPS3!AaB&Cs0s}%aQz$AW^Hrb81I%^aewXfa8KqQG^QU8Eh*)K5 zQ6b~C@J^$3vuARie0XY@EmSCY%!aD>qJ9?}mT2nKs5{o4$42u+bY{2pIXsgC?U)%| zX#^IA&DZV=_jEfwV`IWaw-MKMJtlswWps{7C7&gTW|=F-*^<~ABWMT> z#hHn8auYoc&%#(ca^~3@EEa}am-k*c+U4}@*deS=)Ont)jVgDWSm}j2oawwu38H>_ z8&5D^&AoJYxsp^s`IRNlP^Z(gd$;hwoHqH4oR`i~RwyN_?_4Go=4usss;m|;z?Z8* zG#OFODLIye)clGm*=Z`q3pu%{mMO`#lVIbgR+d?sZPQ1%kXP@K%W5ItUM4D+N@8Bl zt=z*Sy-iPIrmUXhJ*_#SmLT~`;+iv5)C1bkM|VzR^4ewHOtk{jwD##)qG1-gQ;)hF zp5X3wPF=N9iBdt;wp|yCPN!$@Ug2ftv>8(5tWvDzP5rX7!4r%WuL+Yo@Z4GN&~v&! z%$wFsW7X3v%v6jk@?CW)lzdZnaAT&eDkE~K%v2$xYK~KF7Ep`rsuD_>+?!k>r7kg- zsb=PC?86cDE)Deq8qt@iHI)TEKMOjaUUV}b7<9vT`N;JX)110OB> zr`bTIB{`RCm?u-cVGhjfbH1WNPPW0k$!F7?OEf0U)2E=nFx%nu?B6e}$~@=|10<9j zckNPBfx~S0rh3ElMn#>q>Nhxi042M&RI4Ok*Fp=67Cq@vAUYhL>HY1Z!04!|>cVhS zk@MnCo71yvm+*kggr|z7WN(dwOsPeH(dFeOM9uPRxl1@tzFX3gVYA<|Q5&487DQ`$ zDjzwZ=9D_SQ8@78xKhkez=TVbw^-P)ITZCBHjQ>FSCH9`-4Le)7S7*E8*e?lst(UO ztKT>3aiQ-D`P_}2V_jhBMf%)y6A#-R9%jd6QCSP(2VemoQ7U-jL~ePB&!Z5kl)nS95~4>EsNWu}uT%0#s>aNH3Wom!p@3 zJLn~vrk7&@db#Bwyxu10Z9=*jE2!H?xfB*=900@8p2!H?xfB*=9z%P}+^&Pf@g83%~v>z(9 z@37m(tm2ZIFNhg68!cqjQ))*086x|_cAIV3PR-cQi;MYup{!MOTfgscTWvdys#3>e zkDTZgY`cW&k85yPT-bq zmqlw((E>VILb{{O(ts@mhon!y|`>Z+hFf`1J!XWyNn8aB|sgT=t1wBM(y{ zll}Xj|L@O^9Xh~7U-_k{sE|ahb)uho>~9{XLMFQK^Pj$b_{d=rJ@&;%@1R1<8@lMl z=Ki?F{c-nq+<(Q2@C5=O00JNY0w4eaAOHd&00JNY0wD0TBJd1{K=%#=oB5VO_jOdf zO)swA9dOo{R&NAY@%#TzD~n<>AOHd&00JNY0w4eaAOHd&00JQJOCi9X|Ks!jeko|t zDF}c72!H?xfB*=900@8p2!H?xJZ%VQzyE*9;{I#*``nkFHq*jHKmY_l00ck)1V8`; zKmY_l00cl_%Ms|OPvH|>R;#n~I>G4@y6yjN|M&4-egAmq!*Bf{eG0&cZTvSL`Q*^Q zh?ed>PD64VlXTjiv^_cQdF7u4Uhk&Q06546A16WQ9{Rz4txGOwwS3I-vD<(6jeofD z!{2`2@R1{2@D38h&;M^ZZ-PP~00JNY0w4eaAOHd&00JNY0wB;#06+iV%mgJM00JNY z0w4eaAOHd&00JNY0wA!}2;lSowwgtu4+ww&2!H?xfB*=900@8p2!H?xG!wx2znKY2 zKmY_l00ck)1V8`;KmY_l00cl_s}aD@|8F&mLLU$S0T2KI5C8!X009sH0T2KI5NIZV z=l{)2PyzxV00JNY0w4eaAOHd&00JNY0$Ys$p8sz(i$Wg|009sH0T2KI5C8!X009sH z0T5^=fam|sOi%&>AOHd&00JNY0w4eaAOHd&00LW$0G|JEHH$(Y5C8!X009sH0T2KI z5C8!X009taCV=Pv%}h`N0w4eaAOHd&00JNY0w4eaAOHeejR4O7Tg{@-2LwO>1V8`; zKmY_l00ck)1V8`;nhEswOL35U*EI37kX}U-RZp3b$NM`n_GSC1 zHE$ghUO|HR`ehOzz>jzi9~Lgmm*vHr5-%3YikdI!--E?MW!bnezH5co_!5$slDsML zWJL1LwQM-w-`nLfs>1$x@0^-1&wFE&DQ|2#64_T*%R6lr>FpZ0{VIniIVf1vd{#MM zI-66=%7R=e7x?u;LOG=rm3&4?E-RUGQC=8sEV=sPNw?E8I4C@Dn5)=W-*|nMS+U0I zc`sM0p}4XX&dz(ov6K{)65e3ONuQWx-|A2q@*%l&##hKH^K*;p>1LuDiOLdnyQt>n;z|Q?qrTb= zlWWnBuduYFFni6FmGk9!ZY{lRou<>V@YJ-#IJxGUCcV$Am2-Plma1Lp<&7hd@Bn$i zZG8^U%9Fg-D5LKsifvl z$H)k=%F?1jhHK&4M(bv`<}CSSsbRKIp~y2EsosP7U2Iq)sS~5_SbH8=%@fgiP4qZC z3uEngQMA^zENMMc?6fZLy>L&r(=#?ETyz^rqDAZ0ZF@{STg&KNlS)2IP|Y$|jJGAh zHAc{o)-h&o$~^DF(JrTF#|~k2qRx3_9a* z?YdZWIz4;$3NJHfzK|kkm7*T^l!LX)&IV5~GPG7p?!ePuy+hCG{xEM^@{C1GvoKRJ zuE=-QWkK>y-NB8SwyKQCr7}~6jH)?KwOK$dwyR1gWpZzFg_OF)T&9{_tBpfyHmk5s z7gbu@{8B)ij-hX9~r;h{@ei+5pO@l~Uui)zs@;V|CSowB}dfK<3?1ok8_L z)Lgz>zjf2h0A_2RC5u87Ood7bkK#~(UQ*}r9f(E20c|0l7NmI^a&!`IwW zu$_;@mF2RURZ1zPxI`zQT(po?PpKI#`_V3Ut?E6i6E`|N!Ju&II&Sebh1aZnj@cL1 zlrnW-RV)U4T3 zStV0gso85SshcnCy4v~lzL9a!z1!)To)+#GOpdjdR)){4C3912H(RU8FWlQ&4%Wuq z=Hdlf<7laA%qYyAOup#V_rsUrjO`|(yRI>^?woU8ho=y}Lg%cek(OhYZd|=@m(vpt z3zzQRfVtN;^s_I=#7|G-^7Z8Zu63uLnH&B&s%fWPZ)>BRR;%Ai z;TLT7mwRJ5SW(%Lw07z|NVnzIdU)nnOUpTVB}NDP`D(NN^;#th6}msN&U2m=y9HQY zM~Lk-_)TT=F`BPAypDG=-=a26cWT*oxpptJ{-&FlVfz3&H<4QWfL^^N$h%IfC3R7y zho&nntNCNsbz}mC;u75iszDnW8qk)?T%o*{O7Ti@xlpRdJwm+o^S|Y)hVwiwK>!3m z00ck)1V8`;KmY_l00ck)1hy0beE$EIGAEP)0T2KI5C8!X009sH0T2KI5CDOe1n~KP zEwP{w1V8`;KmY_l00ck)1V8`;KmY``6ahT{-%{p;G9Ul~AOHd&00JNY0w4eaAOHd& z(2@Y2|F^_~LJ$A}5C8!X009sH0T2KI5C8!X*ir=W{C`WC6Uu-92!H?xfB*=900@8p z2!H?xfIv$Ec>doK3kpF11V8`;KmY_l00ck)1V8`;KwwJ|!1MntWlksq0w4eaAOHd& z00JNY0w4eaAOHd_3E=sEODre^0T2KI5C8!X009sH0T2KI5CDNKMF7wLx0E@d3cTUZh=e@DXls7gViFo%hNj9wSjGWIaxv;-k#KmNi%SxtPtSq$> zVD(YCRHCNYLWNXn93Z5qr_a!^wQ6JRcp@AX6SLk4X_n8r&9vDx8|S&t#U^H&8C?(b zb`4A(aCkDu1&f-`D(6dQb81;xkSpZ^zg~zd%VjmI5aClwQORc%O*MI8xLwVW3#n13 z=lF49^)_y?c5>~?51XylPRy{IHotbD8>ZdXA}z>PJJvZ#BM#5(=sKK|nL<%1Eo7BU zVWo*>^s=o;4-9c;;@UV?CN&)c{k>%+7o$2)IVX&$9c%J9@V z-aIYmD&*cZN7e(0IjD)a?wsas^&r%|xi(wv+L)ehoUtK?C%R{2#xXos!*^iSx8Ld6 zvq!ijH5s1#B^-IQol60+e-Pv#7=kO#4H_Y1b_bP;;)vmox&)}eN zG1!uCBia3HGu`AgZ@b0enH*S;WHxm=6wPE;;u&M4W9i24foQ?ys0O+rLDr{vb?G^xnN z%o!AXrq8OdHBmIX6S%IlW`;&deC9j@*J`Rt-Z>e0Tx-{JA?T-QIVQNHiexjjzL&Yi+Tx0`&@@W#EZ{SY45?13RQ zOYTgjT~sMV3`ZEMI)dgsPXj;IqQ{uv|0_>bp5ySWB-iuQlu}$$^Kvd)$f~E*3=h_( z4Z#{h1FQ2lJ3YyyaA~*6zt?DDxc(X<{jH;7jTXP~fN3?>FK0*5W+Xk~%--biECsL3 zjMXgEYKYPvcK)}yf6wCnmisgG4PPJt0w4eaAOHd&00JNY0w4eaAOHgY4+-30>lFl{ zv(x43ywM`KthR13B_`5hB$5;pww+eJR_NSq5d^E%X%!>M@I)dNPbRF^?Lx2M(i-o! zxLj6eBrU}Q>2M+)WIz9JbAQL;{;~Uq^bKDi00JNY0w4eaAOHd&00JNY0w4eao0@>r zPB##&Hk2sZWrU!dprf>k&oIjv!O@E=L|#q@;r2CH7}x{*EJ7moOyVv>FzE1h6JB(Os; zYIkXkPg*01U?e>iO(d+^^Z##H-2cn{J@+>@IVMaW1V8`;KmY_l00ck)1V8`;KmY`O zbqToabnk%OGq4%&{MJq?V>b#;YR~__YH|OY`~P!)^;Z`XGzS3?009sH0T2KI5C8!X z009sHflW)G%SJ!*Z@1byKKhy;cUk#+YsX_}&$7#RdOGOvPrvccPX9Ljf*=2_T^)4%ck%PT_Wb|L z7W(wRZ@RzytBMMGg8&GC00@8p2!H?xfB*=900@A!^mu9!0jfB*=900@8p2!H?xfB*=900{hQ z66ouo13v$ae|Gw}RWJGR-`dM+`S1R-^S}1||49pd`rqHUpZwM21+75<1V8`;KmY_l z00ck)1V8`;Kwwi4xW?w7zwIYDtu}k#?SJ{**S?fyeT`Kwvr^e7KKkDurn0LAhu~sz zRP=?>>+hfZFeV5L{Nf_Wb`77Wa4Df93wfrc@7;1_2NN0T2KI5C8!X009sH0T2Lz zEl%KCdhl;!5A^9b{pp$hCx88Mrtr1&=wC1OQ|S{Q`OH(Slpg-;r6;NM^N&4o2bF5i z|MB<#2nD`C00ck)1V8`;KmY_l00ck)1VCVO66mI1^`{5_?B;;2`rdD6pZ{;8hyTKn zll=L=<*Lo89%c;!AOHd&00JNY0w4eaAOHd&00JPeF#_!QKhFOfV+AP?009sH0T2KI z5C8!X009sH0T9@%1laHYXky}R=R&euDBv*T~=M{EPup}u$b<$F7NKGA)5x4-MouKgtPF6!~FjrH16 z%Jn1R$>^So#{y2z@#De+-DP<(r^Jhej8ZD8`O~pNR*}w^m3&DpOiA99 zcrqe++e!8JcDd%NorV4L-f%1>1*L>Ho(M<9#H@Ehn)Qm)smXARI*dxOls7g>|4&CE z`&e6Z$Fa)NqEejq&Z+tGyjd_O^b8IP7bofz zZ&bQ=Ew)m)R{eBVmInZ%L4fmlCD+W0a9rGVo71y>yYRsF8V=3!rSTT3lMAhH-8JGd zDwmiSW(yU{8Fe9OnAyec%%sF~r&|d!H(-8Bb*FL#xiO5xd7gO6D{boIihS3Mnk}Dc zQSV}XCsOT3jVPzeytdW23H9`u_O;2_yfIt96cDE)Deus&L*{%EP;*L43thPFsM8Y+ z3KuGRqE#}LVyRFJ7xD~5Qei18xvXT$MLAwvqw;pMJ=c`g&Ae7DdAWbp4_{fTQ-HD; z*>d2*J!4MK^t7-#(=_{9=cW2yOiHwqIniF1c1?7ZHnJjUs?|EGQ%he0Hm3aQ_9ITu zZMO**!)qyDZ;0BMrQ;h?vCdGnFgGW?Y96R&#TNB#0#nnonxRd}S*z27u^Q3L?tH|A zLZQeOl9al{vU|M*bBxMirze>dR;M_dgtA;HQdTFgu47>Giw%g}W@fg57MJZ6TBMm= z{jz*w;*i6$FeX^kd{#MMI-66=%7R=e7x?u;>lrT$w=O?Q`?E36*qCrhi>}sE&7~KQ zjY$qqbWgik_|6myOLDnzx+pJ|7KTli&%H3?cY5~h5muMD;Y^jLi?GQUrYdhgPqwXd z9`~M9QO+o-m1X8RCa>eQ^_`KxqMDbBl=ADh74;j$eis|gHK&iqx^T_ajX>7}O2QVT~E-PgojLm%~)q5D~MfJ3awl;YD%t}ek$hj+-1Y1;DJKdJe zdpx#ENTS0tJKBysrR=hz=98I1Q7J8Cl}uq}VYo$MVD;3v(=$3MTq^1Lwy9lG=({CJAMvMd=a7Ani@wBQ8lQ>iKQ_zLHgqRe;FMy{>y zHmae)4O;JJ{a!sdT56|-iZNk7og8?ijNQIg`q0I7dzmfeUrn9h>EqJrW*eCr`qr=u zETV-3`HOB5JpXTbuL6Z200JNY0w4eaAOHd&00JNY0wA!Z2;ls`rOXLsKmY_l00ck) z1V8`;KmY_l00cmwB>|lOTVg>W2!H?xfB*=900@8p2!H?xfB*<=DFQhEZz*#^84v&g z5C8!X009sH0T2KI5C8!XXh{I)|CU%#2m&Ag0w4eaAOHd&00JNY0w4eaTZ#bA|69tO zPzD4*00ck)1V8`;KmY_l00ck)1X>cn`M)I=6oLQ4R`$-G^!xspE00@8p z2!H?xfB*=900@8p2!O!TnZTIfwC)+T?Qz}owz5(xFJ$Gid}~p@dt_*2bZ}^x{&|On zj`Dx*lj9~4_WWOP|JXwR@C5=O00JNY0w4eaAOHd&00JNY0wA#E2z1!2biV+<|G(u7 z3xz-c1V8`;KmY_l00ck)1V8`;K%j{Le*eD-2?{^}1V8`;KmY_l00ck)1V8`;Kw!%e z!1MntXHqBx0w4eaAOHd&00JNY0w4eaAOHeQ1aSUuLV^Mi009sH0T2KI5C8!X009sH z0T9@71laliD$64l_p^K7)SK#=?Xh;frfbS|L+6z9&5jctKe6w#{ki2|NMMfsvk2E; z7J!a#OzjmcYCfx+FP+V)Wo1FGlneZNAzmzGlu}8}pNN@-!Zq2PrUOUFCdXp?D& zCv$vb-Nfahyrh(sVpD&$nit$ZJMHuwKQ6pnD$9#G#XN3Ju3h<@FCmF3$(s^SMkMcC zJF$6hzpJ;))leU+EG;UFVA~plPPa(Iuh~j0GcpPH9~>I+ z`X(b0Hf(ItCnnjq`lf-Rs^qh|m4=r6BL@$)ZMm<-^uqq;sd?Gl!uc`{pk(8f;&P$H zW@l1h9~aUq{YqI@b0w1o!$Ttn+Rucy4yoC!lBZUSY8@J_XEA$D&S#Wt>sp^&R!$d+ zE2a?{vv$z*=@v?ASuL<3TPb5$BQ57DmuU{(#IKf?bMi{ETqGYiO>J!$ggc%HN5#af zcS4%=_BXni?n3*h+?b+f_JQ86fw`2!QwVP?SORK}BBk0%Q^+*dKmTGR>GXud!UNk{ z1Wu#0IW8J&XV(g%M&Zj3rG_3F!eS!<#o0s)*-B1Htt>080)xecHjpOQF1B5+4Mtk6 z!>UmX=gSr-rv?&%1dHCUnt8gMEvxDvXr44i=MYSH|6x~ z-!ELEkY}m1QJ|?{#$@-V>VAfxHY_7Q?+wRNQcy}%Q=XPAi-oeH=EIZ{TGrP(X(^&v zq?!QM>7qV*&3TySVzL6wzexc$U%n3L6V|Rx6VyeF`A`F9pbH~$r|0ltVb#Xz)><%g zq-l{O%d{*^n<%#w;iTi4x}Itv>oqdg88*W?O+KOI6q&C9i|T1p=A=A6a3M75^z7X$ ztOgq<&5T*9&4pi1x0xufY~c*`wrn*^Z6R$6_tEv%Prp*i6xC&YH7G0R%k!kl>PSrU zZY|%{=FX^ae7l-p+nSTDYWUp!$|*Ik?ONte<_e4RY>M@f(9ai}`W$5F z%p^(6nl^OJvdr{^!=w7vH&|WlYHaIm*Hk%m;rw%*9-mKGy}QATO`U{3_t&|{daxVStC!eK3f^5rc$G2s>Z6$T9YtOSTAdwmTt`pb3v!)#v6sz7dINB z#;O!)hrWel8k>!JRmADkWSH46D#hqvXFA-~fvU^SYjDym*)E@z`KF2|C5nDL|G%s| z8W08n5C8!X009sH0T2KI5C8!X0D(+b0X+Y|EL;!< z0T2KI5C8!X009sH0T2KI5CDNqNuamqH3EA?;Q@?ncm;)U9h~N z_nMw3d;X^9Z`M_e`sidrJs-F2@QjTKFCQz*i#bJHQu75dqh_Opta?h#$Yr&VFSRZ` z?Mp~vO7f<}lM%^#|KuIcUr0xDtw)>p_V;$VT33es^WJbQB?YB~H=YPb#l)<4LYno8 z)2YdDjHVftVkvKIlK!8LM7;Z0NA(KEL=gG=qsjQYOjGn3ER{d6|6j9`JYW{SvSg0&-td)x+ zSzIhC=TvU0b(@)#h!;vKn__BZndz_Q%cO)!B3u$PW%V4ZT~t|Zb+R0QcupzGrg^i1f5*JjbMwu@Ln}NqW(vi-PUQ-Bhx4Zj_3yK$ zFqo^a(tXw_YT;&w)^RhVsb48&it2KcKaCvRXb;J+vS^8F1 zS%8>b!;noWOUpS;v_61nUS|2}l0u&&#D3i-dMkSNo)0}X5G`5?IUoBCbth9D4qY)YQ!u*`m zbNsll8f{_Nu)(vzq_y(JwTxL4yFxQgsCnk9X2UQ#)t=YYZ*}*q)8q3A7sG~rWkt=K z1BNewOvQFpcQk3)PO_$G8;Njhh0G2ft1K<@^kvqZ1!?_;gKbf01!_BQ8cSbqOyO$u z?qdB7kPl2h-{Dbxf`w=+=SydEYFSy3D`kphx?X7Ce`#TOjY|3W^BkV3y=!z^mqN6z z>I+_YNt|(d_U;uP)b@&ILJbaCns2gDeP3i1rYl2LS8Q<5{Hq?U5g*zzWp2fn29sS{ zNyX(NMRs+BS~6O9DPak<$*YG)jx;ab)y+|$s4Dqvt})nYU)g@$;(0JC-&rWO$@==f zl{V;WOK1zglAg$FinbiQI%U@Ca#l+|U9%z9+hS019-t$~BbnoZWn$6cSr}{Qimi_T z3nQ(|)d~3uIvZ}?*_q&i!!viV-OL&^&J>DDQ=}w$>8VT8FLZhi9uyvImB=-jR=p`} z1&-FNmecBjr(GEECiM$+eO#GONZh7ezRBNP9bBp>1)jI&uGr~YZ*KdChei%G$5{Q| zDNrab$>n6ZNT()Rl?+$!KeSFeuA-0jtfvdb=6wOnOCdE&J2>jI)rKu9t9jl<%X(gO zS&_stp>aTI$cSXOecrj6N!a3mpZ{O??gj)v00ck)1V8`;KmY_l00ck)1VCWZ6Tr{^ zZ+a6#0T2KI5C8!X009sH0T2KI5C8!XSeF2v|F4S(0w4eaAOHd&00JNY0w4eaAOHd& zu;~fl{J-f<2n9d@1V8`;KmY_l00ck)1V8`;Kww=0IRCGU2m&Ag0w4eaAOHd&00JNY z0w4eaAh78Pu=Bs*eyN52;R^&n00ck)1V8`;KmY_l00ck)1VG@IMBqMqk6`h5F1W9D zb-Jeep64qRi~J>ZM)8)|-+u5GPI=38ktr5RB`=i}75WQV^w*fYcdO+yyo#zxuuOkp zFXt`MUlb}AD@(TqMn=b!Q}Wo*;lZ&3qX!2M98!h{$K+E-28Tz6MzaSG93DM#SZ1I9 zC%FIBLjUju0w4eaAOHd&00JNY0w4eaAOHd&u!RV8+XauH-DoV>v*)|-i~m`_4a?Tf6#uf-DmHz{kiS+ zw&&Y+S-)@nob_RA);cQuhwzy2Vxiyi9m{(*fcN-#k7bv0k7bwOatfVsIk%*i3oE(t zrO(dhR-bxisDE^b{=#GbF#T23$!e`_eCZ3{S21efrm5u=nT~Yp`E$5K)(z)lOgN{g01-=HXEP z@Q4m<(wpob88E6J`reOyKbybhIY)Xd+nn1h+c+ZXcswh<|FM7TJ3w#wmH#E5f6dWb z2A*?Rmop~SMsDy>kHtfi_i!&rR&u8bC$N6g zBcsf+yDYo-BvVCoiFZFPIjohN1Kpv1?iQm1bv0e1Z|?iVQ#X%|=q-7S)YZ3ZX&aaH zriX`_L+rBb&;4O!z^Jf|AM1VV#c$sH`!^jL(i^&1U>Wt}?HbnF z?bq9JEjQk7_*SN)Th9ZN=hZB~9Xh`Mr^-t{@}~PIt0mU){XhS}+{}sh+&DC-H{nB7 z1)7^wTkPMbJ2_WMv)=eVg9S6XQKnIN_RwCv0mo-lHaD=2?=xE5vauGnYK#2?dJC>b z`!T8w#`}9LH&c+`%zcn&qLrNHOV9Ux@Xwz6nJF5kv)vu zPJ+#n4E>o~y77E8CXXNd$sKR`^M}Sm{e#2QRIXN7sfLIjy60JkpQE?rV^;;*wyZWB zy_tpbpk*6xn2d_!3s3aEe%rtQU8sMI&6S*=<<^k_UiBxgyC~jvj^3m<;(@xsMkjCV zvFsr{d$^Hh+rIO`(+~g0j)~npmV=a(4)V#99SVaPxwt4V(IT^? znalU{*@f@DOZKsRGucwYZ+-V`yPx{|XZpQtLVGNG>TND(lrkmV`A^@m?NnbP#1&b=pWLbU!JXbLve7<{-N173jd~jQD`!@+MOw78k2hPWzEU%f=lBl2HIIRM zfhPObD(#Pt=*^EkQ*X`yzmgAZaNRxx%LbjlpcrfA{JgFASYJPm>8{6j|`^S~)&WY_> zSf1w_mt?$cvwviUabA1K!_v_neO-@*<32Gu{=6sd_}0#w-=oJW`5D>OsQO&y2cq`{ zSC2n~wYyDk7xsy%LI3!@Z-3LP2i`YHbI}$W+RN3=#SnV;Ekn~Ia&O`~<`Y3ZuW6M> zN7&wL{6#mNK0f!JXKQ;WUPPWg(yBM3!N9-n+C6x8&+XT;xe|$Tolm}0QO1Z~F zmVD*scf6Hin9qx)yCjQb%D!B{`go|9HaY!2>K3=L4o1x#D32fiS^t9{mOiH?0Uyto zY^PJ}La{!6!t+~q#ozzKuEaG>dIj}q-uKNfz2&nvKcU&GsuyJgof2imBnMG#@SV>O zyzs3%T;o?a(GHH^m$iTY_5DAj-uQNPq*XuT_uVNRea+iDI&Z&f&O7vt z66;1@qTDLI^b;57zxTrsZ3oHajbhqgUF2ryIMA&(tB$@#vs$Yzy;W89jkF56dMta% zVtebxs;woF)lW`e*FnfAb?^nHI^Q~(2VVX2H{MjXhdTA1>c+2=x{RL2o%MdGXQ154 zd*Am3=lygF;L$bA}G>tv3sQd9J4L|1l z3i4&6!v0C!gRO_{m(|c(Z8&OUbH35Coh;$%6t!ho5}rT&j*tIQozEH-b~*?D&Vxhr z`tZx{q1U&)>=?bi`GJ?v>zf`Jr`O+nfc?n+8}FZ{*VoGf3?Z==^=yJ(}=SFT3s^%bkz>GgpN8G60{!hU+abT_?R9Jjjv%F^L3(0j{oyT9!& zxF2-?f%|RUrEa-9=Kg~F6CEQR+?G9=vZ>^aCh~oeWAYRSzqt`vh&l<@3@Y+`mGt4;)>gLIBdN)^!D}q z-1=qfV~$UB-qHC)=NCHP(fOh7f9ZL#V~1nV@dn33j<-9@J=b=9x$9e9@9p|j*J{@r zyPnsz)cc9vzvzBL_q)39cYL7xzjZ&}^WL7f+yB)55?jRn7W=KXzqb96eZqdd?WMN2 z*nejGiS16?NXI=rujtre|8_?=-G4aadc^g*?mrNI$z>FMu#pl_x7f4kn_@h4W% z^~ZgO`flocM)y$n?cLt4f9>w-3HJW1@9N%rd%tOYh46R1@9q6?*I?IeU18VXxxU}k z(Y3wj!;YgJ|JL(V&p&tmd-qeW73ZVOIP}$P-_dCmGnuKBeA@Cwx^h*ozOc=HvtW(L zi%Rb3@Da8*wp#woZQs>tof1=Q0T~^ze7?(mU8i*<6c1Yd)Mek+X=U&J9nIJBsU7yiwHCvgG*gS@ALmlB zlS#`bX`U{tacpEt9vT=X6-EXupWrIAtor!%_Tf&ec68Ffur}23$F8#P?zB?JM_B=J zc#LbN&6_XRNrlP%(RTYNu~W_yQ+lO)A2ec|)%+n>l+_FyHNyijrNSjFAHB}Lx2c`w z4{orJX{94Zn~{O6)`#UItt#V6zRX7ZaJ8~Jn%bM{;fJdv z|4V%vy_kl7caMF$VH3;m@p!3D(ek@3+R!-f(!!9I4wd};omv>`SMRvmzFV-$gXNWF zWspufR?FM@2!1(F_tq?r&;@y{cJVt^I( z&+==#?6-GXC4Y)wSC2RB*s{paHOWXB(79{KD}-1mSv!+fsK{k5D)Zi@ zWSn(QJH!%QyM)?xv6EIac72xK@QUTD$aRJA5=DSE(*hN^>I?E$)4C^62o#Llhf91E z@|3|7bQ32j=6cD$1s^kj|BQT>iL$nc-D#q%m8mAmtZa0^cBV;ibl~(eX^o=^kS#-W zGlI@DcNr!<#YYaQ%=Glrob^}vEGNdp+_+Pf;YL|5>Z(?avUn{mI6|_L$*B1i+7{5N zc3?oJLRVwq9ejRONp8XguCTf6g?uVT**smmyVLO3|gOcA316wk(Es>tsd;5>)DH zDxIQ5L8z75;=E@)GFEbvyyxm=jP^%@pzYI1(`~*K*kHx-b4^;*R~LM`A|#>@>mE~G44;s^QGH(F4ETDqx`-0C}@mFHI*OHlvd6k=9!8=s5!*-*S2$v-9fH@?fL*8 zvHBcjl&ev_w2g4DH8zU2VeZn# z-G%KIu9qe?!2N(W&Fs!9Un2VX5Y;>7);)ZP>h*KD{&Z{mW=@3O+{BZm>Av)hdQshN z=4WrGjk3_{p7Cxin`l#z`PtOV+v~H^igs}q)}5T+ynU87^}6+8C$~fG-sTP5V{6y; z9o#Urf@k*7X-3cw7TftM(e8fZ^&FaZ57EQl^99TQ3_c6(J&U#@1J`kL5?<}j+_m+W zigjB}UcX0nO|7@!)%uK$n_IT4^cfokZVt{_>U? ziz;5-aJo7;V10;+-_R;MQ~6ulo@A(U8%?9r$~}YD5ZkAyi1svls#~(>=jX!Vc`H4O z=jZ<-)mwgx&i@a(i#Y$|{EzcL&j0$|3Y`DhEuPw~1f2ib9RZyG>G`Pz=l`ON^MCz~ zZJhu4-3FZhb2$Ig%?6zRasKB&Xod4X`>~DgUOH{l*RS*EL}6b%!m4W_I zFP)b&^njOs1R<|jUqr>PZ*%_Fp7?0@lC)%FJjtX+EzbWhYkB@>cO)!Vu@`)S00@8p z2!H?xfB*=900@8p2!H?xY*hm6^Z#)E->TMyULXJhAOHd&00JNY0w4eaAOHd&pb_Y` zE?9bmmsqTKxj*Q-rt_=LXy=O@cKb}n+pHh8_X{r(p4*e}UGA1Fa(7oZzge(Ba$WDS zbTU!vg77TQ&Yi-=U1fPOr_isFlT3jEj+sUkZrRk(76++U)>=JQ&<9 zSk!!0IbS-PQ_ISNTqzg$^+JGpkppT@DJ=}w-rs#;KIimElCXN5(~*ktdfVr%O?hRY zw`+jBpyw`0gEApIJPTv(0I?r%i$4sv@@R0 zePLLCQ(BeII6YHS!bSR>MvD5@My!^WVv*{5lliNqB(ZEZYPG6u1Kr*&1=~4(Rk)Ra zi{%yiK!bw!Nte>z4Ft`{uPLcXAu z3T66b_YCFw5;=diP$3SDONMxkV)3-nPKKqEX@E#2O%L=&SEc6sx7BrHvr0 ziT`r^-IpUK)r356P7G|Ny&`Y-GUr}6+rN+l56DZx+>ju@fjJX zXZLR5{=3aatT}z@&NhZ2-{C%7?fFVPxZ1dBxmnkhC-X`~DbsF6-_hvH6#Hb@v- zrl_=(Vjl)(UW@dlt=9Gz6)M^Wt+K>E)T5GhTuSeRH8sbtw%ha5=ey7+1hAQ|u^n!brOwPdYi?G9O!HvegtphP z?e0Z^$5w44+@xk{t-AAOcx#F37^@=9)6(>6nm`*tQ?{e*=bEeUnh~g;T58s&zS!P@ z<-D1*4v(=RUh9<5G#}Hsp}BU2PFjaq+In!b0c~}DrE~4DwSsiH3okp{G-a%jgY>bK z8`&WzDNxwny2V)}trp7_IoH1bhOA}Ydb4Ip^`2JeJI(M+AVnW`O@lWc05ySFWoc0< zPM*@4v;9F;u$om3p=!2iEyf0G6=KzXLTWavy00ck)1V8`;KmY_l00ck)1U4}Noc}km@i27|009sH0T2KI z5C8!X009sH0T9?A0sQ>`20=j#1V8`;KmY_l00ck)1V8`;KmY_bF#$aP-^9kl)Ik6Q zKmY_l00ck)1V8`;KmY_lV1oqk{C|U>AO->;00JNY0w4eaAOHd&00JNY0-Klsp8s!R z<6-I`00JNY0w4eaAOHd&00JNY0wAzK0(kzvK~N9_0T2KI5C8!X009sH0T2KI5CDNq zOaRaSH?i?Bbr1jn5C8!X009sH0T2KI5C8!X*dPHs|KA`eh=BkIfB*=900@8p2!H?x zfB*=9z$PYu=l`47c$hi}fB*=900@8p2!H?xfB*=900?Z50G|JE5ER5f00ck)1V8`; zKmY_l00ck)1VCUD6JXE(JNn*gasReE<9!XbZDS{NQZJaS;vduC<1P(GuS z)r_3;E*A@BMa>`n%FDkuZ?vtXdjIELq3^vf@tL}|BO?b69%$J%^p%(Y&*!qXD>Uxe zvp@Xm@|g0I+wQJ!J9OmGA+MN~m&@uoWw5A}mJ9ik;w>rpQlYpqc$b=&=`tyo#L=%j z^sX1P&L&+QAALt=;?wVpKQ#2G@2zzP)z&9bSM!|MG|5#5lJM25x+PS_eaFA7>rM)MNx_e{Xt3ery^21kw_#XQ(|~3 z5%P!9L4P`!NJOX75!OgscPsJmNU^*BMN(pfy6p(^a zBrzqX#c(h(k(i1^q?kV(42oi$dZSK)B<+_-JQzs@(y3rLPBnfpD1}23O)D7p$3?pI z2a}PPeraLb<8#HoxG?|d_r8-Nt@e^uzHpGV2*$-kWG0=q~cOQnux@c6OptO52VA?=d(WjUGd5C`{kF}AN}6f&pIr> zwn8eVgMnBi5>BU+5pk9-B7tC949o=m#5Iu$hJwKvGDtieo=ruPkz{z*ml9`Efmt$l za8?wVl_f$Oo+V7FNSgYanVCsP5;Sfi988E)iEt_z7X6WMGMShSP9$lxU^AhBI_|olbK0{up}nqVKF)5k4y#Qi5W2!4F@7gkvfc|$hV>a z^15_dilzP1#8iYvOwNdbcwjafh(yV#=`^)Yr%BC7gfPd*p#x-&bUYFY_-UpQF-p!w zmJcM8Vk#L;hiSf~QZf}yCu5;-IvNkp(0D|O95b3ES5Hilt`RYj2oo8Rd_F>uXJWR!U@`A$$wksU?yCz?PoAf{);SuqkIi9pbw45p(gkz7TT zg7F!0dNN#mN(_)erC`(_Pp3m-I50yqjKospr8F+7mkh)c@vuZ*7Zj%g=~y}-g_4Ps zI2D{pC4ww{eeh=;yNLb5=l146nQ9l z!&I0efs{<9{r*UlLe(#&rl!IXX*QXhib&!_Qle0#0VxQkBr@zoB*pw}YQj$jN=}87 z6sn0-JW5`a5Ccgm9HBvjQ7JC@Q;CV$WH20_@=Ibk5lJN?5gH`ok0jI7b8w2BES8>% zQ(Ps9vY(=Z%ua!l@RMWG7!%ZQG9mfNb;2os%Abf3?m##ep{Nt7Z=av^jZ?>Aa!%%z ziBu@<52XDu>V$HMKS{YKOmP-SCL&4y)YJsUB>5`&MA(;*!cmGoDhQ{DQII*c7)a2Z z;)yel{owCIy>E2hKl|v{KbLV>UU4HuVuI3RA`z#=NU7036^q2l2tkPgO`3`)!{l{- zKPgJ7LJW}a1So+715%oTAVv;O1SSYqB0(liQJFuIh{n_8ZlV+oOQAIR(p21^PLe-U zB233Aiik)u9;XCL4T9vg6Y&`~LWJ;9b`rxZhiu}n@DBA1Iq zW@$`+A~2Ck$NWi3spJQKDxR7k&tppqc@Z0xG8dV~F9pOfC9A0jtt`YgNfam)XtFHl z1!(D+5>siinU7qWz)etZ5*2YbqiOr2VszFoPEc$oXi6ekZz_?L5^)M#O5L;^k?)e` zq9i2($w+#3DozQUoFOhne8E{VW+Xvj7NP8&NGIdLR3IFuMa&-!65AS<@ zAeUk3EbRAFCJj;uQqrgOhB66x@I=_3p!7LKqQp!Jroxnk)1*c)l!#Kq@ywEzQj$0o zNl>^G6Q4hto}xvA%tgssicgVrT71z5U-^?Szv786Jmh`!>+ihBVR_9kxp#^d`V_6C zak3Vn3KA!BH3A(JXKBUv(U{~QWOILzG!SW}psr{GL6ZoPcf^SVEw~g8Nm_WNM4CLF zGUZex87C+3heFgX;glxW5dJiUX_W0a$Z_aDNs7|GA`+6qK`|bSQ!=0gK^pnVCx`*f zLkbZ^(mFmvvx`#_O;b{!yeY<0Y%f9yJwZDK%2Ukm;DViF~>r6RvK_)KxNg?u$AVm`yjuI3tE=(g93Dk8gD9+G=6DL5_ zUxHRGzn}IB6cw})5GlaOBSN%&VKhTzB9XEjZHiKqObIT{m25&GL1~v3(g-O?fh$rp z#Cex2Oi62kOJPS{P?qH@2La)+O%q`u7@=(kS!$MrMw0hWIh5r%T9}Cs8^uS#?Vpt> zjj*}Xi(jNv%9@iuBvTP_g7OHVAt$F<)3_|Y2#BA;odt~Oqp@N!%9yl}QeTvfX#FFH zml9E06zdel^qvwo6-g;FQzR%1n~R+Tfm4=0R%t* z1V8`;KmY_l00ck)1V8`;)*;Z_dCJo3uv*-|+q>L*ThCbcQ(b@H`eNt1JM+$=&RZN- z$B)0w4eao0PzX%6X?}_ipRO(XzalQ<9l8%90$;pDNVeSuW5Ibkk2)dQ;-b zh~%BC)y#YQd%L{!HJ4V3CHnQUdGDN>FVB19^aJx^V%9q$&3a>#DQ|2#64}?=<(ivN z^H~x)Cl@nk7ZNOSzhYr;B#k%qp zmX;JYpSiMfp1v79^>z&$SaEoQyR8`TJ??5 z4^<2hucLQ6JkdSvco`jW5DUYmOUp|sG&)&$j8#}4LL27QMun_Ws_TPYzIgT7We)z5 z%?zGZni{^S4R|KV-L*CLRt~E%jgzzG%f=Y(O=U}NK*aH{0=?A$3_+{Gcqwcms-8QC|L6 zMm@vQ2n-im15&)i*#Lm^th4+$FV(xB%gn18Pob!E0f7t=g3f zFIjeaBuTg^a0{#@yH?$KvsKm-)$LOiVRkX=s{zy5n+@1Pu->_OGMuaLny10JR(-J? z%7SgQz!s48(bj{!`_h3tTWy60wbiydVyX7-TstJK02r%Ay)tV>Dl>M&n&RyrIe2Km zt7nwPrNtcQ6kRNOO>sq>Yxmi^Y=3Th zz3ut7UDoegKWBZ|nzfD!{~b4Q2*!K zb?dR?`jlL((?GYAMt|Lb{Mf^odWCb+r@nw(;0I2ag>y6uOeuM{et} z>~!8}*~t|O$i=);BADs}*1J#K^m7`Teauw<@WFvv)hmu2y81xJzWqmgEZd#iE!%m^ zka9jIX2#XUmyezJ)<0`kM%k(UfBW>WzhLjtG1g$e-XNkbGpn7x(EA@B|IEXo{$cw3 zFAi+do9rJMFsdK=-j97ho4@5bM|v#VoZBqhI3ntJJS)Ecv4873KyUe#|0SP)&Cy#1 zo^x22GbYtWZtzf##Y2<#a4$$!a;FO8$)6r8|0MPBWdG=ZH>nk+?A_;Wf&bAzcCg2? z+qu)STeFZ-KC_Z5pHU0rfuAkCBSe2%nr7_vd6UsvO|bh9UwL2uO?$=;=uP>I>IBwr zdSsMYc9&%rpJb{?pL@av9+w=}%FTi9P(OEz(Sf>}uF*I5ed4K`$42y)JVxs3+qJZf zOM278!^|OeS$6WKeq+pIoy1{28|vr&Ffw3N*v60bzV+faZ~pz84h`uIT`aJS`tf!R zYwh;y?YNd3Z#R4^)6uQxfywhKeNY^IfXev(pDHi;$eZq;td>~E_y7C@b2BI2bK}sU z-h>ZT6=-f!ZLxo!?&Mr4&3fbe3>M7j|A)Qz0B_>h`o?#5S5~oP6M6|?W13(eIdLyKFCqPI-3aO;u^xkgDO^5$)%4%1($t8K; z?|HuG`@`iD&CHpzQ_h?@Gv~}XdLAEzA!Cz81vY$oVdo06HcGE?+>zB#A~nXiMGe-C z!+#@EK^y6mhGCHpV>HM(Q5)RNP+FSnyX)C0Pd;BXGLtSp{8JWPT(tLtkaqvz@gwNS z4a8tmCIgpn80iURRa(aHi?4iPi&iw!lY**MkpdYd81ZrI24xNxHQBc#0Eev^shBp5 zE@h9@gH`mGdbLHnT^IKF<*lNTS+uPne#%=@-K^+o`%0|0foVfUCALtHuu^W4QyPKc z8NnvGu&&-Sxv{OO!Bbe*+MW`#?~AfWUPcGO70PHgJCs|Wf1~&2Lsq-A ztxkv7%eYs+yukvC-Xt^35o5_6MCosYG>i=}Tr)L-d}y}<+O#Mnbd1rID1hC6R2238 zu5a!@frAl!G{}+Sn0PGZ#YuvV=LKromNrrCM|N&~O}2k`DQd4q0QuP^v*bN?bx6^5>Yiy_E9c<=2e&Ro@N zXrLxri+aUA zZ>1+sTfgnnbKJL=qb+=+fh=cgiw<!HP4v4&Z@D7*91 zl??970Oa-}7lfRV|B3bXpa{~U5d_*N9UOV;J^4>@Cg5ZIl1e%Tih$fcxzA-QD|USE znBKipzY0Y&Px|PkEB6lD&F3o8FK`1fO5loqHV}~tU+i-)+&aLn9n)#F3$>G$s^48W z@_S%o;%aJFc(jvNkO}8)v)FRFQ34(zC349iKo>AwJvrZ=7(aN{-N&M9bQ9oR%Zn5% zIp2oYPf5yp+&Vd4AYe3XK>Qa3k^1>@lo`3f(x`;t18Zzp3x{WHA1jsa+A{jHsaz0a z+!6nD08zJo_1K;BV)9)A4dX@~V2&J3hLg_o03j$*G*zppaJJ#m@PeD-lGn2# zUxnE;SkhxfwaC~1qMA|5A!`o~eGnQ=nma&(VgdUsG%Yq>T>tGv7q`fAX zP3IiCcxa1SWD}T7-#0=14NRKVOoxI4)#a|g{W({i)^!+b;JtXp`{*(b2ZPbnul|Ii*a*dUQP`E_(!(%3x9HgD5%&t#lU z{NDEbO|N});;wbrvsq`v&P{t`2HMi^dF!G^GCOM}<$L^=NVfUfnmv>AYO2~b{q}<% zY?v$)De^kzep5a8G}7o>;gsCqwQdZ9Y=fKAmZ}MKh0-0T^W{K?Vtz!H4*6;Y5=_cJ zMv_+}kU+AFMv~^%@>H^|e)P@=?=Uws&8=v8HqU#wts|0e`Qe)KU$GcSrZ00!nbt%p zll7;vsSz?H&LR+JR<_l*!p}IQ^0Yq#_1|9ekq{LzT89DqgUPLAVP1gFQ5L(&lWj${ zm?LjH8u$War?qwT(Ab!GAbaCyPOL;GwsLVH!kxORFsx}u!tN_b zU}_bWV?0taTiyQF>gwA1wz>%^X+DsoHkW*G|K9nKh=7Y{F{a!JOy1jVpL=+gr3yV2W6zAA)k62UEVovr9=hi07qI`L`78&Wpch{W<((j{w_G*j z@<};ma|Axtl}=E9AK&c5wr`wUTf9B6*t;c@UCE(+*oV7pABDW3XLrlP^5n8vhi%_K z!dE_%;>+l2{J`WE`}Q6C^wMf3FVF$RrqN{sDQLk7r`+@QlatD42!x#n@Sh=^R7r0Y z-MAWO*KPU?Ym1(&y7!eyD%#dntF%S?p8jUv@4Y8Z@{8)x?K#rwQ>Hs556)E{_H}SE zdY>iO2>Mfc%Jyf<^I{`xgzl=Hfi@O6t}hvzJ!P7}#hTI??C;>puAtpY61TJwTv~}T zPnq{>53yjQnxkkXw*KQj`105Z6Us&9=uZ7rWutDHs2kmhBdS|eO2wNLX&77T`~*Z- zDh9G3yel*DRS5KS*U>4BZe3<5MJw10cD!)w_`(u_iM7HEb7YtdL{kMKHWdE@L{p|v z88BN2laRdNI3H@G`heDaOrPT({cdGvVUis&q9A*INp)bEImJ|za|@9j5Y??slU#>pK^WHeENFZPXZ3g&j1d6syU`Q>ba&@wh$&1SJwd(Ng{s8#^ z-C%4A$0a6K(2~tmw>czZVgdcCe&<@)T2)`WlrOCgBu_3UAXHZcFM?5o(qMdqk zBNgS-Ci`f=eWKq5-aPHpw!~evs~-lHGI=PK?jKYwQO*tr73d6uIw616{NHX%otsMq znjBqg%r9>ntLhQhs3uSgYH0N?s@)v!gRx-fQt&Sw5H^zu6$yTEu^Uw--~aRTZ}dsY z^U~cuNyrU_fu<@9uK163yh^sO+d2x(q=QWFKRW_gPn=ozzPw>xLz^#6Qp5U2n59Tzyf_E+r}*_YWf?Kaz9TazuzMy$K7 z=UR)bePiB_xiO|X#$~zNvd*HZ|5kUX4eChc8|7}LMj0jlD&H)ZkwfGzQb}Zd@W}tn z4?_} z;_@K$i5^`1Ql`$(^Lk5ZMI2Q&zLZ+l+jXi)M&a!M`0C78U%2^s>7w~Ts2j6Ee%U%k zqroZevOp^ujuxL1d};Wu2Syr=O<+Y)?KOj+au-$cN*5Lfp_^p4d!5mvy}`%toZ9=O zrwp{{L=;X6vpap&p4YOZ3rd1eTYA7;y6QaI81z1LPoM2owDX7SIfcrD_}MmDI)6^^ zI676yT}@Srn++krPyWGi%W3mJ(UA(oQEfB<|9sIym+bveINAtN0ijZcZ9n8#jTF&Mpo@9qPeyjT&g9o2@UUmY)6U zK7CA3J-X=_wpKx3XJrPVAsql*16VugZ6)O;dEvlMBNl!k(L?3qn_#h*grv4$)PfmbS)#2RmAOp?-5t&xZP;6~7G-xxG-)%FqPq zjLAW$OZ!2>q{bsSi&xcCqmxyM*QSKW?89clxgc#j+fNewI)r=V8J{XUq|+w{p(|ww zSP%5#BS%mYgnm>%3G#W*Ft~|!KDkpqbk^gP0B}EIo>8PDN$q@f?yCF^$=P*rKQduCs<^YiDhv#@E2wjnY@Dq8I`3#v~ITqte!2caiT!6Z{FCvk#@ zuDPk9j#3z;m-fkKjb~)CQ``_aJ(D|t#@z7jl{-D8CN~KEX%d>7WNvO<0K>Gx^7?ZL zcWgr=*@@EJNLsFFD;)pYp7Zh3(x!1iXi*2?=7iHcw(DAGG_^ICJLLPl!yqU^xGm5FM!es`%W|nPK zeX1>rR05e6i!6F#E0KZ)K`29$uu0MDL)lhP*V@p8AYa(zwYA=g%3Ix!fx1~InXZj( z@KHedmn@3;&jr$ku|cLC9U!!$45mR#`SYTU-IH*1r2=t5B=g*wrX7hVPY6OCIuHZQ zZMiBTXjWBYbY~p;@RQwpE;dR33I0goOdE{3)GtoT2r|7V)LE?I!cJqVmiyb>SxZ-Z z#J@N-lI@EldgUGlKuGyv1M@5G}Pos>-vtv zRK3NUFt5G;>WU!rnr<9*`2AJ+T3jP@)!g*JQmr{uyJ__e!y^lv&Hz>$CKKz`-w}Az zfwz9#{AcTwpu>E6GC^t0OWbuiyR(6hbG5_b)0&qtxuIoB5V}y~C|14}tctf!Thsm9 zmgiE|02vsxu2%p?y>TYUKKE@%U-e{jMG)#xb7vzUw6kl{#+=1K>yqL>vtsoZhj=iSy+1fdl*cPfIHwsZB6C-yHINvkueGbx1l z3`PTwU<&`nsX^#V^-Y%8%9yG?qH)s}`COyMuNot4&uF(W=Bbd3sYc%fR-}G0<&0i+ zxj`sV2ZOhABQ(C)V1J=izn}PLuiBqF+JzIjkKohXp7 zZQ*YrQM3tqQzoB4`5$HHlwb1k*XovbD_`};0x?TeYuRy%w7Qzg#WyeARhzz^n)wcc znJ?X(%4%DhV6uBsAA1Tpj_+uK;&{Qy%u19jsa!|JpEUirZn8!b_fpft7hE*-GTf(O zB=u`4ECV0|BZ%0x>A;(kSA6>F@~Sy)W~~N@prcl!e#k37-_)-b{Q|Q9A2Rxd&-lBl z;Qn&sp!=tjDs3$l%9F*_5Y>3oZ$tW{T=c~LoC$(O%}oqvtopF9gI{|ub^U%`h= ze`W-sFw)1HDJeUSOw9*F0^j{~4Mqk%VB!Fuw*)_SnNGl;Z9CwYbUc}r9E2XJ5B73C z`#9y)H)x|R7o7R{AYTzX716Y{?fQFSy)rpr45Y=p9U-+9uX zB|+$Z5^+C7EvsZ(ZRK?DwDiV07`ZP|^7>u%5pZzpc`Y=s<`;Xt+6>>~6TFu~ z{9yYp6=+~9$V&=#B<#NYdgAs7kS1W>05~bkEgCBBq4mBRUa-5rPL)v`(*4(&PHJGLu&er$* zmWNkJFZzPe6EXH1#W|_28#1k5mR`KC&m19!kBhKhy*Os*J7fA^E$#OOp&jasg>q@! zLiltnjESd3u;|4C%L}DJ=!8;mIS}1F`mr>05BCV2l>+)0n1Q%*KzhD32%S)8eB;D$ z0pF}BddF^FuH!QUZ65f^Wv`5qo+}JOC6ojv%O{jgH)UEGZf)j9aVX>5l`%sTQRq;o zGBu<0N=Xp9pb@xBpa%)rmfuoSRUcSdOYhn1wCM-if1I|urxGW8b_W<2KHpSyLlinj$ts-b6b&NqTa85SpT1 z5Lx)~k%DN;Esn>}4t@@LWoH}C=5++xavPO(N~Jw{K`4gYIIz0ey3^l92*$2i$Il*@ z_`A;7jrSOOx&0?FsS~BWc|que7#hY5{wbRLAhbga4WT^c@1apNO>fSmAT&h-aJ3-r ztg5T0RaPz60^Tjt&+g|EywVsFuU?){-*1eQo|qJbwusf|H&=T3k=FMGnT}`}R06ag zB~?oT(Eg8S>%E)p&UY3}dKwgh+2DERa#@Q%`0HCk_J1rrUKoUeXfO^t{02~v2v%oc z&(bgU?|KO&0{X|~M7Ar?TSB=B?y1ji82HKM((Z9VXoz~@j+hfjYEea@cKp@csGCUu z{5(Qc6TEEOf2cGeJ(L=RI;aQO7kwUq`zzF2yy3Rq51b6b!eZessqj@+5Wes8_+Oi( z2Xli^0S$vZ#7&7r@7T#&>)Z2+oztIJAYh{D{Infa-shzQ&3*rUN8iQUtOnYbXdhj)ip6%Nt z7Et75nBZrH!@AdBd}f7o>+IlUN{K0WFw>DKjdN2H#wMO?zvIlGw||BfVO+5y1#w3+ zOcOK}21k-9vxAca>Q3C(1pJRsPYX_@J881mnQ?%{X_vC?G}dN}Vrrz#II43lg5Goq z$eUYS5X`2VR)*NLFiQGOTJ?Jc`3rA{f)=cZuWE71W@8zF`jEY-kZvgmPM|AipwLh7 znPZlH{s5ie!e2hTyW!n?SWbdJ(7C(`ZylO5Roi;CWt8eG+aa0+WX;OxnR71Kl?DQoUbU*!v$8^OH+8I3_T~%4udRF2^erfBXV8&m3LD!6) z_I-<`F;uK;M*r#ykt+YmVJDO;7X{Nhejzf1sa&Cl5j^dKttXt>Z;naz7Tt%gs(iBg zsk^ggOII`m)0|Qvw$#E1y>tWzmljIi)0$3dIR1tYSy%KC`|D4c>aNzp-Rm!rE}tJv zrF}B%lbNoMQm6sG<>pD(4|2|nj9O&M`TPsAFu~vU%F`RaYmhE038r+UBGVjdn_e2& zcIIi9P%M8j)!L?4*+qtQ>HOf>j;-xHKH8?~BR=Z+>9qt2D`esi1T~rht5~< zmf&9Jjn2!QXE--FS2^c83!ItG5zb!DnAksJzm9!3_J!E?*gIpdjXf_m9NQ8bh@BZ* z7&|`J9os)P*71kqYsb5e7aZ+)fw0YSn&V_gqoc|(-QjbjJBB%WJFNC!?4R4;wC}S& zY`@LE)qb{pgZ+5>61(4?XV0(?xA%d)!mqY3Y;W0~wLN0H-F6k;E(Bq}P;D!*X|_?e z1X~ZA#rm`LbL-32r>u`!cUrHvUS>Vj+H9R~&9jcS_Kf*0=IxkWF*n7W9n%^!GiH3u zpcvKindNy{J6vH2TUJ{ZTKtwG%OuMPOMij^wZy4^sNbmXtFNffzyreN>Snb`U7+Tv zuErhEyH2oEbaDHkd0m1?C#nW!WyeH2;#R(?y~EAN!I$>+;qxk+9q7t0gm zKJ>6>{4%qn%7^==m5@k^>!~K|l_CXpWa@PznKm)r^`3$X@fL@S>Z5wlUT-Nq&^O|G zwW=PPb%n(7VeXJm;_Ye=470aD`q8QO!>{okh_U2$B+xOQxP$dMx3#6MsjW^OdW@Qc zbwcm^mej7o-2v$ptM(B&()+ApZo|^r#y0tpUTCzOTUz01tZfXy_Xlni>(s#xwHM7T z&zHC2r(Mo1uJp+Vc`p6=C#K#eFR5+zmiayK%eoA=ge`$)wT$Ct`Q*C3>KFnb`Xbj` z4P85Q#;IB36;%_pI~1rn{nRvkmQ|QgSrcel)!-RFJ}o7a+;y0evC6msYG%heX=!8A z(mX|~ql;RsOtp7WlA4m0lA_KWsAhM>hmx@=$wjH<%J@O*q^?U-QLN^7i8gKASox~1 zOVTssOS&#e&5$4B?2ZqIzjpbmDo<`o>bOiL&fLc0`qlKVe|ezZQ<^evY^pNIjL=ur z1_JP;j7>>TA$z+l8Jm(up5kP{KILhw3Di?HyS#}JDxVLgT%{Mz0hQPpp?1tCHqN>^ zl8XF7PsMbv{3xeIo>^3pS3zQ9LB)jEj=^u6_-*AhT2xtHM6LqOW1NdBr6)Mi6?v-_ z7i;7a7^1Y*Ef;o^0o4c62Z3_)6lp782qJZrOYKMG+&pll3Pqm5iIb(|O8zx(USU4D zj4jgKs_NRhx`yT|+DUmIE3aAys#RyO{(4(l8q%{eO7-V;@@tHL<3);R^6a{RGQ&vD ztg0h#8_CMHCi12!+0>{kFePi6l=()|A81nM8A%-DmD#3bO+cwMk~l#rvy3D(D#}bF ziH9uYfRThQoxEXAwvxBZ$$Djuk;Jnn#cw3N&GqDUQ?hBfGS`%>t0MYnz|{msL+&j~ zk;G^yJB=hpL+)WpVl?hDC2N}GV~iw5L+)lI>1fy;fwl8(mBMiO%__cA3h8h4tKbydnujBLHr%k$*@Y#Db}>qs9m zdPG5LMWK8H6AqB@fCQUmHId_^&)ev5dW<9;&KHa%hEv&ZN;Wmh!%RsG=WruQhjXZr z#Bfq^2T}D6i?PGJ_DF8v57_!vrP7xll4ljDTx)A$EU7lZE7;yS;)#zCh2*LydJ-Q z5u_{C#@83iP_3MYBjK&Xi^PCuY99V5JtLKPw$g)bV%~h8rxgEi-Gh83|FXy=m&U5S zi0sYJ^()dZe4*pF^fRX}Ly;%F#gQ@Vq?)$IR%$?@WV2nEKfzcUzyrxh$pnxQW}kuc z>ocYF7!X@jRta*jj7&gN(NDdaBD39<7b_0zu6F*L6CGDBNY8Pi6RGEqQ4@)5vTBvK zv7V%RtEtsTU13$7yz5vs%O*jL2 z?`UYoWwrb%8#8+QFFnJ?KNT`0f43?+^3PKIPx0@^zZSnMenv)mn;lmiHz96R9Cj$zPp(f~2V76N9&lad+TvR0TIuq;@?06N;jTU| zoAXz^B6!>RjPriy_0F@LtDP&I3t?H{b&hrRb=qV99s7Rli#Yf1jJ+r}6k8uVCpIrO zIktDK?D)p9-|?{H2FF>BHpf!OR7Z-Vul2^L@G9XCH4LU^{4g z-*&+EgzawI6}HW`HMV88DYl8W(Y8J|$@;zZRh;SXv|eOgZ(U)XW6igwSO;3|F+btO z!pkvxV(yK(CgzNo)iLv8w3y_W-j?4j?^&L*+-14gveD9PskY=>CRj#V23opXtm^OT zx9U6Ue)SRcI(4JET%D(ut7$mV_fcb2S@~7@TzNy;qwG|+DrezT-+*)dOr=;!RT34a z{ImR#{33iGJ}BQPUoM|3Z<5!`jdHa-Q!bJ-^(APgaHXVDs*d2%_9LSJF3(mV1yMd>vV2SRc< z=NYR9t2s9LuvV(l8SEgO*H%)yyr#u)+#%n^h5|PYi~LMgKh7he)t@MizyoxqkUy*c0rs4or7TU5h0>%C?Ix ziymRiBf3b%4Ii!?J7kXNgg;#<;K;3=@JEVn>!rF$6smN$BE8V*tLbiOf2Xe^lSGO zs0CO-=I^S-Y)Kg(Rg1*X)i$%+EfXgqDPrilC>qPy4_+(a321ZclIptF<`zh%Cvm!G zj~Jv&nG}*&OZ_H*xzth~g%diJs{`_-OsL7LW!HS_AfThoWTa}a!hrZnhwNocJQC34 zMuCZw`AvdknGswQ9zWnZs}0`_;9hMl+b;5uU`>+AY_TRp2ARcXd1-BRO?&-6izqjyx_1gkHrguWPFgUat3TTb&$LVAydCAkQL{T$;U(%sK11U38O#QP$WfWx z6i#|03WZ^fCLI7Y7Q8^g3zQlr+C*nA=d9QSXtKAEcAh@OcmUh?jf`X2zHg-a^F@`PS6EOor*!5VuYX#8WkI1bP>^XgyJaW1)5OiS zA>S}lDfD%M_#R!4^z5y46ZrLvcrYR`6!xx9x`uNlp1788m@uP}=x&7@`nDF>OxHEw zn3=o=$eN^Y*jGE}e+^oXf|tHx)Ng{6zGOMoZOxUXmE))ltzu_i=Q)avaU7hUTX4OO zU3n1eFVg&yV5*U%7e~{LB)ye<*_3Q*BrlngxRsn{B+0HSZ*}cb zh47mO_CS8mk8yj?Vxqj_ic0i#M%-UZ<}Lc!|VG zv81?S5w+G{luq_=QZ(f}*#Rbv?Ct=QMs{_8NhNo8fJr6ybXSw00jt5~I^E=?&k z|KHXB-;F!T|6}@pwm_qG|1}LwEl2hL+&lSE{XbXvAJzXK)&FzLfTQ~V2}kw+NA>?y zk4nuDsDyk}|IbDPb_Y}?eN_K{RR0g-hNJrbqxyep!%D3Kj_Uvaf203@2CC+x`v3oX z_5aLbI-=X>CND?z|3~%zNA>^oHOEo?|55!veVn3e{EzDYkLv%A>i-c6PxohYRR1sh zEgaSV2bLby{~y)=AJzXK)&JY@PMqrhssF!m(&NfGWS9s5=6xv{fi6C7_ju5m1L9B2Q^zQ(TEV_|Q<%~o$4WZiE)%{nFK_n2E^N-SSl zF0f>)KdIZ*mFf`XdDy^@!Hf2X<<0Um*+O=ajU)*l```StC?fU%CF}kXsHY_1!699DOHVyAL{%C1@iAAtx0<9a3c(e0mtQbh1Zo$ZH{|vgvwV>+ zMEpensi{#>RW6*-_qa<_NJ@4H&ZBh)&azE67SyIkIdG=_f#48bSQ2V}>!v4$Z@7|- z$qB(@^kDdv;ZCVa@ho*I3pEn(dZv!ZhvRAZzJWU__>`gUu1w|m%qu$?T^xb~=_Gh6 z;Xc_+PolNudWdD}n@jgiNm~L;QE*T4uy3BU5d29GKrrh8!Y?#?p04E|+; zH4E*!txv&a2B(Yex=c;Br4b^8_cH&5r$Xhu0? zL~#giq7&h*ggc2V&d<|hDYq=iz3ihO(&60=ql1B>&Y>uoac9D2z-8Aw(;n_lhF65( zL)rsBBs?TQ$CvKfl7wrXx?%eu;T;kFoOIupogs%YhmvI2v=IDEGn9PrJ3NvHxEZub|T zYKNC2KI*Iykx{2KY6gD#SsH+v4Dp5FU^*EdcWE>@;c*vMckDe30x3kts$(v9&7~q* zUBb)|JWG2(3m!Ju9UppYt9|kp7sWP$B6zSm>dnfah>qZ@wz}})6({}r7D=2Df|qHA zm^H5>7qa4wA-N$1lG0 z%6;HNluVw!=o&ef#|$&t^7O!-A3h>?D-FRl^#*G`lSZpr@9aPRQF+1m`@pY zr&ypYSJo!-7@(wYN)UdjStaKB@h>Ww>%a{)tHXxji0Y&U;dJ_l8V5)HdgIcA@HDN5 zBZkkg(wS4W8KJosm7RW)*moH>f(Iy8vhf~-vaTV`O+mPt_8|_hkG0S6W{BrG8Po}1 zvVNVVkQRww$$^BI_@V`RfJ;Xd6D{o~7;fb1siPHy#z9GX56zC3b z`D9`mF<^RG48rex5F)*^f^as?T$-}EqwjjPWl;{;!RGWu$6ozv%RZ%E#3qoQiu`(pX;kjZ^MeB5izLWVEPi46&yOvF274ikKAA@ z?JnHLvzoK(YU@`{@cZ+$hHZ{jx87pJ^TnE^?apwUKyIW_A(F{4>A@E2qtGMuV8jRi zGTP!pEx%^BUo9pClwMA!#!ri?RA=#n3o}T!%wRLEmL^;qR={`U!rVz4uQ=m;v7KYB z&~MVl`&ntpZ@0ej*<@my9c-dr0<(mdKx1Mv7-e9y>>HQ#ed*b+!5gMv(Nr{Gm_RyR z!1sBX`S>HwoZ#_vA59YbCCrve?5eY?#zo+h-ee(FiECc4fug3d(o7ConTt}oBH+_}sG9xir9*M8WjHFO zsOf@W07lpOB0`lU#FO~rfuSU}B3Mt^qDLCTr?KPH18vO{b1zxGq6rL|tpVx~vz*?c zbq0Jgs~vIqNPIW5C3S42w))yJ(^Fr1C?a06_GOyer>(y3 z_g?oje@Wu1D!z1rAvqIX;i9D z$Zg^<=kahtKTHbNQihr-!TTEBU@PDgNn7!+9)sHNSuQp*u?*;T0Sp4-)89X>J3WtB zCIxFmeb$e^U4L?LIUR>YLGSc6FgNUP&}RJsccmro74e26(4M()w*2|pSN{BwsPlr$ z{)#5Cu_svb`&8v$*pVhN7@UoFq)Aj6yvIMh&*P60MGFR~z%aOzOJb~X$+k10?bs5| zAmB+=;9<^I<(xx@Mi+f{!yqE(1eXdF=0(F@vi=nc0|WQ}5C4B#;qgz3yECpXZiwqe z*Jf9m^K0jo&RlrryF7L&qW^sWpL@ynf7v(Ly|zznXWAxO|6{$wT5FAqIe_=`D`Wau z4p=U!?`5@Wk55Xa{2gJ!wUllc2>D4vd85nj7EtET8 zYfL=iAEmL=a~eogARFiI44gj|Yp;cDoDqV9=rO>`#V-I>Hw2p6T7Y;7ByFHwGVlJ0 z-|xrf!Oqhi(RPFmoJoUel2fLIrqB~hrZ}-Myw&u5jKDfQ`F71f1~&2WD?8`F~`!t!G!{o z3z+|cLupUZk$6LJEX^9i_(b%Vj<6xqLvSDM!G_BZW9<2o>G@T=wxMnLGheirIz%s1 z=&sojRB6}FC?$xeLr$6*f;;I9@MGcennie_FR*aI(#VfZn&;~@yZ0#?*jyhe##3Nb ziD)n#VZYs@1{)-DqBjKJ(#FtFH$9fs8n=08y!z7$aC$~~a26k3eEkE|!#g=4I|Luo z#-&k_aGave*uD7Ufx{3M0b2wM(hWJoDBOGZwP{zxll6Hac#s|jH{yKq#f3|N3FOns zTu;-Xm&^iA+IZ z8y@>E!9^D)W<3Q*G4`q%d5DKK?nAPAatJ=8hryS!KDP8Z^91H@QY-uEj~(s9pJf#j ze67NtCNC?`d$09Be-4p*cUnjv7TDhlW?5XhPd1N=e{MEAMW%-l)AQB`{+_Ad2` zja`eK+wj`};PH1!OL+(mp~v7>k#n+6PeBjs@>_oiDxQpLR_^Yb)Tf2u8JY#BV#~bP zS5czP^)9;kw9s{6T(mj-)XobEEDw`y^<-6A2yUPCkX8u9cVxBFLvZEHYOxXg%UX&y zSO3`T5L`AJ!Z%$Nv^(N0H!MxG6$$4*I^^Sb^-C|_Vs_jG4RrJjb+Hu`dD?j;Q2q$j+xEBS`YuO9l?|YDGGee^(-DQYFUu8|x z5_WINo^`K--vq5*nf1lBSKowgvt5Bl%79pdO{^a-13=37Gdok@rlJGTUPSSXn@P&2g+|eMD>)+GiVqGmBi>rI^*L zeT|%-MEEo?S;*HNpnqADa`qkzDP0^&rrzT7g|~QJ3itT&KRyK7hT5UmT@f=c8Z+%V zy`cnVteP&Xt)gDoSCEq8&~fw{AV*vSM4T9D!4EdqZy$0RwmG&_@=)F;#3sm(zhWX& zep<+FW=7Ouj5eeF^JnjS1@|*-H=#lRlr*g1=`sDUdUOrROAn1SGo!z(rD!wsYUQC3 zbQj1G+X%m$$W0kLE;U8lRP^Z8X`f>MN#kN9#g_{)B+>iu^T z>Na2&X$y zUV1ctn2L}VaR-Q?PhCT89`VqauV#^ng`q@Bq;(7Zw0_T9 z7m*2tp}|y?m?Q$dQo)wKB5z-;*uQ;!#W_@x;j#o8gp&e(vs0@Mr?xM)gx60=nn1F0 zLxap0auB?^GX`xzyKiLk!$4&$A$FmF{zX8Zrt;PhGQK1t;_@uy3st2=0Q&}tOAD6o>sx757?RHQ|Es0=>*H6&C&ax8-~I`%ms}UPrn~w(-*;ZqzFT`9GQ)cyL_v>NKPjE$hq)h*F$z~U0fctOYqvesTApdX&MU)p9!Qf4{WLZl+c+SBCzLjb zD*F+Y=E)n$!;`{Lq4dI9Nbqk**HT(* zJ<~jq`%*$tIu+)gLmPkasclV*8wJvYGM<|yBT!HIzsS8)LeQx&eL0g?OKaiayt=mD zm#M8y+}*wCz55Jtol|aOoRy5qEwn=ZCTutuOLeRIcn|^+PG+c;iYb>GM{fpfCh61U_ z>|jJ&;~2Lt{+x@+4sQs07Z$ahHLRklZdGjyJdxlazG~AO<9qe@p<_&Jq!MX)RQVY+ z81Zm&$K((+Fo_5|#;ql|uayRHA+27+rq=Wvy?@2Tvq(VZQXdVswwT;j7=rd?1jJV! z;?Dp98`h5B@lg7po{ySwAiD$1PHH1lOm3YJfn!*pn<<-< zVU(S5)@jGTyMo+2J_Mai5==z&%Y=xhJ}u|ZK>fGZd_=dayjoNCAs$lpro|!XT#~Ib z#CBB*ecO_y4e%fcH)n-;+9l`zyFsdX8?yoGz-f-gd;bl9delw^S#_;jlIwJ&?j$vCPhLy%iXrOMVwbpZm>; z`3Ei3ta9|k^z5`AqR=Za`3fIDK3`x5p?lE%Nq|lNM;!%-yLRH-| zD3BwYIqewbTII|Z4Dt;?0y#aq<4*6=iJ>am0JmrWT`Np)za%(zV|^{|z1gP1hCIV< zl!mW^4*CUh$&}Dy%23_9{LgoG$Q1Q777Z--%r=i$LzHC+LNS2y&N+1P&=zuWMre`H z?lX1)*X!Ei#Q9$ye0-u<(r5-fAEluot*W*-i8tbj^w2_4k1dG5u4h!65n9j%@m0Y` zCMUj@a}Eq1Hym2{PQ*v|0zs=L@bjlGfJGa*Pz%i$;nMYP&=xi7J>grM5nL5w7Rp(V z8byzG<%H(x5!b1%mX{v^3N3=6C^VN&Op=t$8Ua^$@GSuk(_Bx|uE*cj@0<|=Ga5o^ z*F$ojUmTiamdu!j#_)>Qo^o*HsrTeR1(_3T)U3rZ!KC&?Q01#0CFhL`&8COWKGI;; ztcc*$E=8{J>DDd8E*aV9c;0gjAm?U;W{HYyjw2vlS2XJ7gl1Cu*00{-KhQHducoR! z5+<3NFK~r1+ky-L9wM2XlO38tznmm)tSW2T;Kr3Qr_dFjTIWsf$u7#v6N<6IOl4O# z-K!iR|XLzX?q??w}vlbOY{C`#b%wuYrb|jM1eW7V|6k-0u z7ez&JRed%5q-hJ=Pk+4jiEME-gOBhPj&|!HsK>y%QGO-`ENd*UfwCf-~ zY8!Hg&>Ld1nV$d0OYr}{!RI zjwc+aI5daFzRP~8z1%*~_JQpT+ZgMQ)?2Ls>!6tTVy=x@5tE9zcvo4b;1$3F>WS(U zz}e3|$!e29pH-k9=cM7@D(8^n)KrBXOK)WUExY8yjk$&)+qB2f6_| z!ePK-m_Pno*-;XPl5H5IV6Gl5LEHgqd0n;2GhJ&Q(oUQW>mbCj z@uAlx`ki8iI~sEMlpnu)J)itm9A=6&{qce?;)pX)N7uUQnbZluDNKU1k0HWfxY3uL4De2?;XGT~ZqXs5; zfygSltZ_fpXN^`}xnbzu5+TO%qxCF=y{}qE50%=9OY?sDW&0DD8zxB6GdjIfF+2x9tz~MNA4k{v3*o2L z)iEWzOtQjInX$TjszmV-)tw%ORxAUT0o<-bzniB*y6&T1^V)i5?x{^?SxhTSr38MO z=m6J)npcv8)5B1YF{G?PhXMYZNDIQyj}5~#cUmh`wbotzia!g&7LQ-U@LPkZRRWkN zzu5=NB=SpP7+Nrfi0y`^rF2Ay6a!O67&@>4P=5?y^DMlj5+cNRYUII9MM_*W z(Ckz>CL;_j7^^47N0k4!df8#9!4mO3afze{U<_!}R9Fa%-WfSDRG5maCwzTwO|xvv z4nyU|YO}AJL5`qyUKqNqet3DpX*!ohY}7*MJe>Df3BJn@)AZ?yI81}X(3?~p?m}-R zVgk$7yfE}w%&wCSnX!DPYAw4)5BhncGkOxuD-0k{vZ(Up=ln3#R}76lhhh~T2^z(~ zGd>IjRv!=*x67<(D=qeEiyz-I{p@}&Ar-L$9A%TDa5sxaOn#ahhMp=5RrxO0QEMa= zc!xPd9S&NYL4Ndyp}Ipa(@!IAuq|?#r(y-;W7HVbueI zIKRn9JOORNDe2PvXKWWV2i{u7a<}|)HFzrdtS}7iRA)V2hPH7@v>uPvP*x9@ZY>+eLv!twy#Yru!ZE$x;?dTn*%loG^4w+^pNMoMwDW>(=j{ z>vJxf0?85^3d@R^bwgLyt>4&3U0=J&`{Tn zyBblLz)E=CwB&=2XJ3W+<^l#P8s-Fkge20|&E!TS@1}?QP?^mw)HRi*-dt_5()iOg zWEHBT-};>hFNvY|!PCjx>EYg@8e{XWL?IxKc=^uapdW- z;clV|Ys8UM$qC2PrKdlGuGMy$F1E3r>3Z~#5rk34X5OxYY(M>rr zJc?GWBgzcNQZgSSRtF@oGHt1T8O#riU_ktE#0!W1QBld;%978$b2*C;S z0z8z4R-HF{%z=flg5yS3ln7y@q8Aqa<~un5Jo0#c*g|V(2`dX-|EX>H_59PaXYGMB zk1ZKG7&=*W{bIKo>(-%EZOfl0(-4j+REAJz;;dwNuO#7HW11H~m=|;wYMUK!< zJ?0<>q&?&|3;CG5t&CQVRpJ$k{2OA>f1oT;rYjpApE?dWo^w3zxYu!=<6OsP$680N zqtcNhyT~s%&o7kAWlc_zhsZC>yXAZ2>*b5(Q`HrSi&vy(smG}UR42~=-zXm;F5WZB zL&~knm9`gb?Y28@*V@jvonmW6q&~kb&z4~uj#zy*>#w*Uc*6QQd!6!RS-_q`OeE9UN)>tZg7*&MUh;)$uX42qc>Gc_h7CLt!? z@~7oX%iES0ERR}lw`@fmzKNFeEn6%bENd)tEfeEEh<`Tz4n#SukDnHw7e6}wn7AL~ zK8<@O?(VqD;!cUHM|8v7xUq4=;`+tOt}k70xpup5v&6f$y3TQhERw5P{kQtEx?8S zVt*yh;9vBj~evHfH1j$iP?qQyQFQ4JGp2j$+LbFHt6vPp+v!lUrZXQ*+ zunwMrX`GM8StJjUtXKA!l1nbaCYvP4_2ddhqx7*7DMfH4 zjH(FY-d0z@f-M>8d=^*DxV9$K7_p(j_$!^MH<#Tm| zhA{d>WhlAn{XOvumSfg>la(xdAE1~aU@|pCLSV}C^w05#N^M4xhF$*3NMgRp7e*2@ zN!v0lvT9vxOBF7jvGfrda2Q*1$m4%ivV{fbiDcjhIgL-60kfHf z7>dAP$|Ec_OJRkuQ$(^jVxaf*m!6tmbc6oBef4J`$!U>{k|3eZ+1 zMsx^?c3#A8UkuO4K^l@TpYc;igAX+mDaI9(nhHw zFT}K(vB?YAIy6A!`3U1AAVe6;4ARa*J29Y0-eaBM(#fNp;L^z>MjKdNMf#V~29_he zYqWu9yu$(!M!(+ZyuqirXLR1+|L==e%zDe7*syC6c{b~QuqlhAF+Ob0$zZW*X^uiNu^-rF`kQ32t%3aE=X}pj#cbhxV%Ds zkbNe~#)m9ty%u@Ynv<+I*VUf^63v{en|+YQe2+yu^{07xS-wa2qmt?_m#Y|$#Pb(V zEN%t2W!Qjz;={*{&{Mfru~fHodl%UV5PA#mjSel{U8WM=0)50`{wVCP)4^y)~i$#|X|8_6}@(i z^dT^UAq%{)J%ofMJ+(jS)y#2I<42L*CO>p)R)F z8~L)SrgUcnd8(W2h}4e=g=n^D#vNT1GUVvM-AQVpO{Q%$)f8XW5!`iH1nA_BNb5`p zq+1U!rQ)OaJD`Vuk=!O7JkalGkx+VQC4R%L|aTE}^gprg?dAYYIJju{T0 zW0Yfn!(so${*isZ{bBo!_KW{l&i_p8tc9yKJsPSyo5hV4St>p1<783nuk=i7t!m+0 z$p4$?e_P2|GelK_}g<$a9XK|A*Zj|d?*IBN~&TpMp!~fq$u{&eiVsm3{j%OWb zI{c0v_P1dBKg-_Tw%>Mvt=!hj`m*(W>wN3rn0I2f#Vn5*Y5Bx*i)EE%jQXScgnGU@ zUrkazRc=)3l`-;vqBiU)P&*CgK&Z=# z9C+sT=zD5ld>9X)2jZ~?udCngwP*f%_O{jMji&nuOfm3bqvZD~Nm=b*$;Ihmyn7x9 zQ)p9Fc-Yd?9M7Hk@K+m|)l->%t*52MzW44+r#>$irG)X?xd$vrdT^sBSY>wQAFGV! zbB|T>1Czqd^i#>^PvKgzeTbC2cl3yP^p%fr@k@K^{c;Re}F$2IoXM3LXbgO&K(~<9s zfnr4p9F=}?mz*~)jHk=&**xpY{NTheo-Ri{ zBE;-rglF&R`|U$qN!dFwjQ7YLzuVlXl}dMQ8U5K*Fw65v{c>t)nj+gleNq+ zo+lg5j2Qs*ZCZpL899m;vjR?ya?Zps-XVARFir$om%jQKURKKe^vRR6!gzUXOcDct z9u}`1-}q!$HN3Gh>mvHv!YCTX%roUlr zcOagK^SNXIl!A4mCnJN~Jjq$(!+5J}tZW8=_1~`j$^5E{Nd!}flz8Rt2b%l-A&<`v z<1Mbiju-$4r@g>>!Tz`ZcrwC{FpqeohNiMY1Wx!B zQmGF~X0A$^5`7iynam2E%$=@0ZhROIZ;fVSe%ZLRN1l%{vcoDhO8EORIU^^`-rVYM z5*YvnzCA)c;=|0aOvk_g1OcT_4CC3Ynf~x`sAnJhX8Iq)XWjziBxlr;3ge-znf_26>W=mZDNyFZSOsevD-!gOni4b?uGO{}-jJLB!=dt`Kv9z^EID3T9@KOWA7Xi>&{K(ue-p?9cU;uRUm~#4V-?`-o zFu@Hmg&n4Wgu!g&Old?;*x#ucbPmg6z8U+>wh^3e`1@M}0|QV#8g~09sq*lgFdocy z=m_Q5mAg;)ZTPjx!g88g5t}-~0jLZ59C=tq7!P0v!WRag3)T_JP;~znrg}76xV(^u zP7346t3iLP9=MSf;c<-qD2Iy-AS)JJE-5pNSFR@NsjFgZrc;~;%$q0>ls{x*7|&b{ zCc^;ef{CnG#w6Hk#d@RFS*ak~#H?^BtsbEp4$#&GlM!`MBtD&x87}G6XoxD>1pSqe z_%y4@3hdOMLaaPEJv^1x)P+3;0FsT4T@=aH33br8@Dy527eN>R1lx|gs*x{t)q!Ke z#k8Wn-!TBpUu3^CGQ@s&?7VPM7cUWgRc))_!ZBs+*pxKwlx?e;4!!se3<;QxJU27O zJToVYe(@49y2cG~+PmX>dBD7I;a_1-Nl($j=l8ki_HB4Vo54tjJ{mQ>dhLjv)wp;l zi)YfmBwRoZ;dP%+#*55!tv;O7*S8B#arhe%2rtI#PHaa}pqufI+WM5-uOyr=&@q8C z3g#c6D-GvSbUB^gi>7If-&c)4^!xMRcx*(esXcEV+hgGdD)LShus=TWOXl6z%6&`2 zK7o(DRf>Z7C-|m@bE(mNrv7{odaI`8V2Ajtt@+{l^2!%KM+2FxOuaaXclLOG76te- zb;gt{_sI-<=?lK3$jiSa)Lpt(y)OTSuRlK#(hJr|ZW9FGyjoH6z-;(T93}Uz2y68H z`Xp%t8+PNCF(u{yWA8n{t17m>@pJaxr>CHZfQW#I7%@T`5JJd4DTDwi5IQ1+5Fipr z3`D_3J0&XEd%G4?EZDmuvG;55*WL^EuE_UWYi94+=Op-g@BNOq>oDvrahDvO$(=` zq&}B=M(T9<0DL>;@{~m>gF~O;9bj4Lkl^;Y{uJLU zzDs=beM7vTd9U{_^B#&=fY*2$JO>!>!XMyb;}HE*eUm;E3dWuiOT&_6?hMTbCOH7jfD}Io2#9c0cdYIAb9a( zgC2TEot9LKxw!kHOMeYCX&Z7PIaJ@eDRF(A)64?x$+7RAI)Gz5sZ1{%yPb2Nc5-e6 zrsHbE63xQSW1q!ot5dmhhdik$0%LJ?AQ1pKkj!|X-PDLmxfZstj-hh*YA=kyNIZFva2&BOP1bS5F-TM_uTAly2n@wl zyTJg(iO_y3$YlOUHy}Lc5nKV&Xd>!_YHi$e-M#2#ofhXaBrff%kdUWXO ziVgTaYTNHD_$_b{$jBY!d1QdLJ{*DlcM=YI2pS#yVBJ1HQ=kVPM%fcOV#$F5NZL`4 zuT7F8pmt>zMc8WJKI4MCYY1eJUU0 z&4O9*q1y4`2<*R;8UaG@u-Q{G5s1NO5Xqg6Gb1qm?hAJ|){_+sa){D0zx#gm{X1Nw zx6F<~X)JgpV0IfH6018XywDFwpGNEGhs=0ml?ZVE(Pv zA_LIX&3bL*l^~x1WPL7{uZ^n;BCzmQ&nH|TNBAP^DsQbM>Vqnjm&=u-BCzCEmvIIF z4HaEZPr>WSi6XUMHJ!??BG=F3H)ux}MPSRF)On(mBKPs$d*A#JlPDH(!tgPGE|&69 zxe?fQE53;`9$#~q^_)eYSGhrsUa7U^M_|aURt*EtsM|D5B#cqVa}#0YB1yd~@**(p zR$CGS&?O=L$Fu)@V_|o(=Briqr^1iT@|*|^y47l901TIDb(VOv%9I^!&5f`vw{??Y z0GdAQC6`G<<;q9gBl9CL-%jozIJkXvl2>l%L2?JFNM4>=3L-GxRs_KSME3R$XxEhu z=0dsuHJ3zS!`&ChTkEw!Q2e&9ZgjUz@Q4_e(n)~5EU-WFv~ByUklkQv%L*be=@xAu z`*{Yy1+Gh?RE3f#myV9WklS%*CG<3CBaEJLPw`xOfG6G}$dapbX#n0O5n!aLFw&i> zTkrP_z>huaE?T|g6FdnrYq$9-Vdvr#155BEtt}~xz*;-0KIq;&WS>n(H7)%_Il1Cx z!RWbo;N=$~>WX$mUIez;io(SA!5=eroIdu$j(705Mhin_@0@?f4aZN@8jB*`sGjwr z$N)4q*3m<70=1|VfINmQ&WWTG2J7&G-AX*IS-w4~uAM0zd}@nEMPQ??cmM+s8_fFQ zHTP|K2Z|$;IPltK+2n)l;PbVGqa(1+R&&Jw7>y3!X-i+Zco<$~1P^sor&2$8NjPYu z)=(6IQMT$cSEuD_@57P><3D`jwgIeNxIP?#HMZIa7=X5#LZ?SNxvCvF2xLr8ao?Vw z8-ZQ6VngV>U>eqGgjtEol?Rc!f(Y!d)ka_gBpG6E}W^>)nwybdg3nmB%uSw5+Co*!YmYU@>p0U!?O z;#Ef#$usMm{0LiBC)5Rx>9AS0ed^)5TqODSVWT3jq*j#10JN)5F1qEQf1S6l*f1z? zK>sRLRw6FEvqwhsPW`1vGF!wTMqA1qsl*a7%qoazRMip^Fm75|qf#dq3?Gi@X12^y z4m20;#jc9AnfVbn)wX&J`W1(O&h8ggk&okpYBRDUE~?u@vQq}&^MQ3n%g#`F@`yG) zynZ!RvK)Xg0MV7brW};P?4Z^Lu@=^}g7qgjyUYL}pKbQWZckJHYP-Wf7EL4^5_x}@9igMgS5ibVrC|MWof-kI)nRi|Iqq~i;%{)FpsO_x};DGwA+*LV7VG06v};ORGvtPklD^q}0Pw z`=@-Aa%)O!N3SBQ`-{I(5y)nH!^M#BxrCrgb8DxV zb*|LY-XHb7?Ouxge&VG`mikV5P6j_Kb%ns#4#y31v^SSUAkpOys^^`q;Qz0EaA5?3;4%zP z8Go=k(>BOk%9c~olO{PGHV@Y_r(SU>lqC3ui>{JXx_`}I*YV)8FnO!?dSwJ68+54TMbL6fmP-EpIq;mq)8Va;gslB!+0x@zKP7GGVTN;Z8&JABs zeD1hD10J@Xc5y|vTK-p+%Ge>2_UiZutwa`!z{W zv$g%IkDB|s_R9DOq|uCwDJlu<-$Ygwfvj3Z%#uFR#BfpD>Rj7hx6S(4Ibqbc&TtyL zwbx#*ia?aj2)T8g0smcul@SQI)vQ-%I%kxcIQ@jz9(`rc$Lv|B_H_ZZhCaGF@{}%TC?)vhzMliJ;C5u;Y0=$ST9GyVM3T=+(j*LJiUf`Q9eXk3f z4=9)-xG{4R!AH!jJ$r3kIQTeh0a%7yOdElC<`z7M4g&&sS2%`=pw`-5Ga?YAs|a<< z&P2pKM(Egm_c*iVr0dU3{si_te4-2ZK2P_Uc-~x}cIV^>$6fn8P% zK7Gtui(O(lQU~!Lc0~iUNxV`wmm6Q~TXM*i+8dK15WzFjz<-IV0JavdD^>L-kiK$1TINQLV{ zw(w;IWOH`$@5Wchkz+Q@3ul2QX#bIM|0GnoIP>3F9)U=o5wVk^e-{xV|5=#EMj+Z} zOw2I;T}%p5aRicn6ttg6m7S1fez#W zwHwAnAnEUeM=jw?5)cy^MePM|T{IY?sT_<>5()t@9nojC6m3gk1mgbQK+u~9rns~+ zTvKBcnj-jIhw(I(9P#T$MWPCt+}O zs|nen1ypRk2wxG7u=L+19BLwo1#ImOd2FY}BW_Ln2>~MXJ1!4LSP>ur2aUQdkV(c- z%{UlDiexU!k3cm*%^dF4LO5b8B-)ovRU}msFU^fW@UI>s7yxsSC`VGc@=pGbF%d}g z)oVHfkiLYiQsubpfxD{m&xBglB|{^S;VT6Q2EZxB4hmyuSdT^*kBmS}uU;P*fHX&J z%{f{mbXHI$QgiO2{0JoSNdrXUsnOP>uWYT=P%4r=y>Lhb()Ye@>psoMLE6LM6iR%F z?){b?;|0YLNY|AN6E{Qgu479o_yUQBO8;*}**1(6Rh(ZKfuLPo2N{6o#8#K)`wsWk zs?WtzVfwt$5lGe53mMm^v9%wlh^{eIf|t?fsnmL#IyWx@3A%bsv-2XOJAD}!UDJBUI6k)KL9}NnvFnIb;Z&zPe z0P^dn7e+=B28$6f0L`fVT7<1fhHk=~sbcwFbQ<0Nb6s0p>G!1{ot}~Que4LsrlbW@ zx23kFj!gL@r6Xm2iWa&pG&__U>z?HEp7#v(u>|}Uboow>Ore*|jzSu?EC2G@SvkXpnI|m&_L!%ue#OwRkeM7} zN;mzW3QMbN%HoFBrOlbL#wzxzs}C6!g}AH-j;90} z2+Jb{yaIE1l+>Ju=p$>Qkd&!7SC+?LVSHy+M{oM7;>Vt>;BT=bDIZbZfi2jx$jG2d z?=>d6FKI>%kiMZ|ho2>UcVwAI*)ck0*vo{%m)Uc*-g{CMqO=S+85Xf$OIGE~&s0%S zqmzC~eP{%QYUaMtcu5U4{DOPtMBr&~km@}}hqq>k*?K~)Nzh~I*wU(EvvKvej}+y< zAg7bg^fa-2TNsO95@_9X#1e8UE+Ql;uPUumXIbCSFqqsA21~f@m&@U?^%xJ$TuIG*p1UZZn_yh zm1)&k*CZNEv5pd9eXrr#HBB!rin2VP41fO% zy9yFPqu6mr0Rctr2VbAiAov;q;bY+iAEi$Ao8 z$(eJo^ogi>=p10BtbMNgr$r$c%)^6^h^>%tIc8P~Z|+s@Qy1Sq^jOA8jFs0%(nZ>a z)wYfvzDKU^D~m#2I0!qP2!&ADu&izYx!ubgH_2>RyJhW`&+(osI1_Ky*^nMOL83zA zZReCIB#1-62t=GJoL}UK)Hi^FJEny{&pUH2o*ol)GU)0?M~Eb7y<^&VHW$DDH9hmDgBP!aF<|DMiAvUc@cP$M61e=B>|ZzZXOyck{_Dy}t;n0xxouMb%>PA)(ia|-Mt%I3fP?6uGQ zL;HGo6jDoNQTO*Nj*3EJ*$*46m}A6ZUf9&u3}wXef7Ctu=S`pzdIA`WuR4q1%>LSc zYF~|tLT1S_8)j!Y%PIPixp3!-Ukf@emBtR(NX3(xEM4R|;**ybKC68>DhiQhA2{$A zk3f}mP2}3roVv!l;*@d($HCCc6Kqa~UAPXxe@dUzz8D{c%u>x|RrTE3${BNOD=JIP zmYdc@hg4!NdEtrU3Xd7+v^D|oHj4Omr1trQ2xOT9!5YM#X2DdJOfg$JEVf z_rG0WW5|cA3ZBW^tbH~u0;y&`NM6Ls;CE;l+!BMCoxu#itkYgSk7SM;`qCQ z^zM7TKWnh|$%F_5o}HUnhCVm1J>uyz_JYD|r)Hw(cBdJ#{j$0be_p74Iwx`*t<2eS zWtOe1tDjQ02u@vb?l^7cf}B?#!?7rjCxK20v_7e+OvQ8%1|4IMLc41dtIn>OU-`m! z+DEe^kcHd!Aoj~q>xwjv06=}{b&v&o65*>u1kXCLB{JJI4|ktVarqnqT*Xt(!-MZb zUXZE1J3g|UT2bceYxQTi7?~@FZynOSk2Nyb=7~;t!x7>K9vlg~dFdmsjErgTjgPcS zWY#eM7sw_=j+Dr(@%gurVTi1z6-QcVQ4DkqV)B7h5dmoB@wv>3Ho<3dT58T>%`9tY z@q+etIMOVU@Hji6%)dsWa0IoBT~E5VdbAg`yS2-Vnc5Dc!uPW85#LSz{ZpSwy%Rp~ zBdIG>=cbPJzu>7Si?+o9u zzQcW!e4}7V@ValHZ%^-^-j9uD_mAF3yf=9-@UHVNK}>)O?FUG3=F!+1$ z)AXG51JZj0j!gFk4h+ms`#DgX_HLjwkeBv++JkAgrfo_)JJ1vQ7RRTp@V%F|IPI{s zinQTr2c_+k7E1jo^*wEG&2WE8T9#=bE zBoK+>riN8b?xW;LArcpbTeV5n3`S?<} zkzc0i9*|=|qrt;i2Ha{a^Vm6}NW%>{!i(cP;Xa0)%}R=GLEO%|)Fe0*u0`~8NC429Rm!)6FMMSe5Lgj#dSd#?$suNi1V+Uq`a3P9x8jAYeNJpZh9y z$mPfo{)cN8uUgSuznBaM=Qn~T+!J|$BfGvCF$Y$lMC4Ck0^)p%p)PaB3z`rtZz_m@ zsOpIRM%dpuS<$e}$QQ%OYk*)0W+Xc} zC2GfV)TB%PNUgq`#z+5IO2Esw9M-`U{-oZ_;jC)j) znsK*E;?YmPOC>>D`kg9Ct-3=csa3bDB(^pEHj(6A5v@APkwo~1HkAZl&{wD=jn#6M zL@)JLm832lsgl%%7L}wfG^-?y-tq%JIRBoWEw2qr_E0qhq`T?FP4 zS$fX?Mwwr$&fK+nq1MQZs*|dlwm??hwH~Ingg2xv6xy_UUWtx}25ogh4&E`eH3>Pm zyKBd@kKwMvX}T|C#*|#>?#}~K)wrUSB7?Yo3L^H9o0G?~tM{sknv!s3Pn=)^8r(3s zf8a14GJv%>Bdp*pBO$AekDyh2JhNZ@L=;f>Am-pnWv&P>HG3K~L?QqOt17DqhlxMr zWw>4z%bapt{}Syf4ZBa{vlRjXx=-iTix)Au@Ll(cqT*WDHi6vIMbf6T(`%_#q)pq` z7y&-n>4*#Yp6PSl=GVn&wVXzlhSW@H!K zhCWL6rNPx+PrqPg+LZ?ymEg9}ik-ip8B7%coy$w*S)3oBYa8Gm-o03yt`foAM>79S zsO+v{N8hz=^*FA{15a+oJdV(jlu03A+-@sVpN&Wa0XumZ$0e`|=P(C%Muv2sAr@E- z4zO}s+-Gth8Orq;_fh=*l!G7DR)!DYPzv}lh-uRjPHQ7o5=>Fcb0k|^v@Ayw$DnML z1cT9X9m&NF+R(j?A#RN^Peg#O#`-X#AL(upZj7_RO0;}ciH`BZRT4}}%W))e@EPGq z;@~rkJ3hGq2e(x?b2Z>RQp!{}sf`a>?rNsVN#Q9(-Z>errqJ!g0$St;!KoW;9l0-$irch0&LYM-em@eLnR%HS2gi!Jc!pXU@1pjK>CRzpEIlf z?)(4l`#)_6yYK(I@Bh2+|ISx|-S>aq==g53`~EL)^tO}p>^yYK&f>A8&_$#>uX zci;bUd*6NkPk4;9ulc+0|Jv^R|NrXyKk5JHxNax=|F-n}v>j<3X%IW5UZ1)sb^nwv zQ?{m5hJFhDBUB#zEO>G-Gw@_!L!cty_22EE?c46#OU7cWHX!1QEt3`X!5^Fas=t z-dM6-K~lgW^KgBXKmVi#)*6-yES4<+v|*IeW9NE8xB0@_D2xTO&|aZ3jn}HoF{(u{ zTrm$E_{sBmd(5JjRMB~-f8Zogce88pn;Z1&Cr4pKm;((wVWiDimnOh=?INcuKetpp zVOW@jhKcS%MgTPcr?}so>GryN~5qh90Zh79c{(p z=9Q}!Hn%Q?%Hibj8+GqKvRRHD>k4LPlqEovk}Ja^{mk+x3=sQa%7wb59W>Tl{bQdE zcfA`&gTw^jUukneYKIQE-`oG$GU|8zwBb=0BPvx-dQdhOX&VkMtNavdd`!X^8?s-m z6=i<-GwV^QpIRA(4dP&^z6}O-;?qiF_N(ru7>S-#BFtU=aGS7-QQGp*RsT z{%WwWWs`nNbrd#<8NeqjBobwY=8Ep)u6y}ku&K>vTaqlckl-Q8$w&s>`*J8nZ__u7 zh{8f~09-tY)m2`HhsYK5+GH*a$M<;kyw|JAV#AqTdJK6@(@!ps!Y*+D+9GN~8pM`n z)ho^HZ#o)3`5lg)lKskHd>mjo9=7n+PYOq2r8od;;}TN?yvKt+fXV#zkr@lh4q#nO zEEUqmCL9VNY`j7`lKP2dQJ5?a#Hy3F4s`StDj#OXm)CqfIh*_^qB@XZzIdwmcr0NK z4j%b-g^JF9o!r_K%6hnwJz1M^`X zU0n8XL%$--HWMJtHZ9Qvkg|6+!AG+mEr^ntqwTN=g9{H%EsHwx{&;iTX=j`y$5-jk zbK&qk;2JUWU44CV6lRYDu^z>kIE2z>{f^h>{1zvrKl(c8xCXxx0RALrWJ~pR#Zj0& zcG50~a%ygdnf}S`-`@Yj#W15qSBTrfy|K_Pm*r1<_0;b@Zr9hAM%nh!GNtBzgcsL! zbpQ6xFEWEfCm?r0Cu}5R8w89YsJ$k!+ONK*Itm*|wTy)5EVHJpJhNtU*j%^gNhkh+ zAR`CmqEpzM&?~7AJ|ZI-v_}P6wzs}|Y!ud!y`dB-JmM5LLlnQtT=o8z9gP#8!svij ziU}95adeU$nvkuL1^EFx3j?i<;U^{U5&YD!pxX6sDDbVSkhj`>A`5?Kfy{p2aM4X!Vd^iE8?%vw<4bxS!RQ z^%dizFuNQAx{-c75~GKkv)!{YTX()7_XXBbMgW>*ATN1`oS+|NMq!898`dvkgoPf2 zx$Z&N-RF0o52+3FP|{A8a|OdD3+Ca=;4Npg-eg8$i0MGV+7srwdj=dey^Y2X9l{hb z3exc=1?@=^=DvkGzTyQ(w1 z4Mrr#>&MQH=FxUs}xAC=w_}j0|nMq+yJy!X93W zpIy{hj95|cY|@XJ8qFs6A^EOM*5_rn8r1XQ8JK%8p~D_7C4)aKCoERZ;(?*nFFk%Z zpml&h_WnBaE`9!#XqLpwWAfkOb)p?6(egz8-$J`II+V`A<(YawmL|INeK zU;X{FEj>D0&vkhk?o@jn8IBa|^A3p~N(-j9Tri~YWR~olId}V2byzBxI?gH8;9l+B zcl5(@qM4+ZK2YkVvx1a4!96Q$)7E=uKzZyA3i&p;aqqMawNyUhQZze!Q0NhWd1a4ep2%u|G|ceZi)%( zqRPNGhkiNj<>&Niv!e%f#?DK?qJ}wKuB>4@M05TF zQ?BjyE^KbVdxU#Qy2@220X_8iggH+y&}$2$18E5q$R$8fX4{&}$MkAkj~?)uRM;vJ z4%$^~1jzb>Hhcn&`$=^FAL%;Dm40pdob(X9|F@-$O8q7Ep467qUby`?rSu8C5;{F( z2EPto8f*w=1U?E}9+(^G<9`hr@iqQb-wVETe6_xG@6+BByhA3Nqn%=UttUel&UAi<6sZlmVM5_AKugpl!4|$ z4W0NU?IwJ}sTCk2ijLcU`eOa#ycjGd*&PY9;PBF=&C43wmYP}Kr#`RRiW55RLRPM3 zX8DA%`dhg%m_{l$CKg23^;ET@7;GX3f|(2yEL6q6?XdhcGP7s*Esp589fu0njru#& zVlasu2w#Qbw!|{!F{yIN8zuz{ex8=r9DtWD+WQ*&0uOJsg9X zWN*-fIL!zcv-OrlTW?0jW2q3Ct#^pa2HjQ!DO-@j&9=a%$5twp)6`Ha1VQ~!P+m@k6K>=kI|l4LRDXJ04EB>< zpvo~%8UIx%_~Q=j7K}>l6^shUK^qm|st?`knD@r%PnE`CO*sH|@uF3vT~D^8HDR;< zy#YIVH2x?}8fi61I&i{#d_FlW)*s7@!LG6=?q!0zu|a{kpsuZYMB7~w1N91l>d5TC zT(Ibx-#=+OS${Dv29rvz$5a2e>lH;|Jvjj16U}DJZy1QwEi)_iZCk%N?Pp0RG%5Y7 zoyER$4_Hxqs{VXV6c&>`fT0JksbXr@{7Zk_=ZPENmqyC8@8Dv<1y>0d3NPeDVGhZ) zxV?X~Rz;M}9&_Y7P|4z^=2qM-8y7Y&H|zT!`SV%*Z?m=!J_MrGHSh$H05+)aULV}| zqW)|}6ef;+akdm5XltidR_Em9=9|~uyXW1(l^GyMd5tDBM=&t-0{2<`u2rDhI6{&B zz_=(38v7y$jlu=j9C_xYH@-T-Y{@~J#8o1nQPJHGBnfI_R08srqkkH5#HV^kO%(Qx znV=)#(@u)8TcCJdM`jo1oFZ~fi(r|&(-3f0tpVfLuJ4nR4;VX|Qs zM^cC(FsqfPel~JV1k=pocKFZOc(;E4geVLl8J!@t{{)@FW=3HQIRJb`5MXxZa;f%c z&T-wkao$xJJnkvaXKseGT!awQw}zvzf*g#fT!Wdjl(2gEsLDmND=OxgdA}UI=dW%* zj#nUt%0&rpDOG7j;!tGYT8WPA#9cjC)`8!}di-`zWpu zdOvHKY?CakzGYJHfBNR?_k^P`Z0t0Ee*=YPI&i?Qu@jD}MsCK0AMZLpe}H^ds%n(Y z5QUmKaoSyzzKHhI?>3{$y6DQ^z@botqf5JJ#@GgK#`qtvD@m`Y5Th}+K{i7aYUcRQ z;iTh~lk~gHXj2!>_!~H!D3)}AqNI+SQT%J(VXyD^g=j`eoot3IEb&nzTJ<}_(IdL( z$=^VsnKq96Xk!;7IkC~~<)8c9_@THA(baaMH6arlHM8fvC!DtP+ynGGMn)HRL6Uz_ zS5=)DT|_)AL-Mek%mod2uUBV+`R-XmPktL7Y}AnohLSe{u2 zdn+Fv-OIe*FXF<9Es4woOo_wUoM^CuDU&h^`_re6>X3RdCU6m5f(=m5EILk zObiIEwk?0C_`6@@^a9U)rRP362Rk7i{T}g=pVDtFj@C;gR;&I4BvsM*U5wQNj1yk* z>z10!uh3q5aMRSpvEug7m^$FDy5;Q8-n&`9r7Bw2#aKB2|6PPLqVwoNT$p$v#{;NYQL?{;x*byn!HdB&>3*Y%sGM&~;3+SZYmg)8Ni@W#<{OZ%Lx zoYm;PxMb1xX6Ap=$yD2UFXV~_P(SXxz6Yc76g(dg=etK|mFw9b$n^Go&xlU#H+of1p+Yeq(A)0_Jq&I6QHKYW}%&->iV zB~5%@vU6qC%@d-t=;mYj*_c*QHf^SSPd($G)t>&(XJI|_u+jrd6^=aA3HX>ufPDS{ zL`}S1zp^YklTJ(p^29W4tbA8JX-WQ#y<+d;+|8FqaqgxoF;Kx>qYc)aHGBG)>-0_4 z(HTUZLnUv3+>gzm$wU(drVGqM{flj{_A%t(^8B#Os54;TR(PklL%(8tbUO7jU*7P- zl~axpR)*Q;>iW@7e11V8kYO3)X3O^rzVYC?o&dOr0<}!iud0nsqvnl}%?qzs(VUx~ zr(SlC-=ytp6eGC2^n-^pp%gqKrdcQ8YdAEOezlPQZ+4|`N}r$JH|@2wbJHfKewBJl z>d~p*l+#mOp(UYi!MB2Ig8Kws4V;aee=q;@{*!R?_xK+4o!~3<{_efmyU^Ru^OEO8 zPm!@5_Vd&9KlMBH*?M$US%jpU<4>Ct zKUrRzC3fU)<87z8&eG9XunV6anUKvf{T;u<}M#vrLt zK28|`(stX&shuHxoEm$Li$U6wfsIl~1%)HS#j9G{AQfyd4=byy{N?89){_u%21Ghz zvvdVBo?W{c-D_eHt>i$!DAWN281K6_?2JSgZ-lQ)u5YnZus{ku2aI2XRM zJ4xvwyi4evgpa@b_6zO*G<>BoNMe-ecFN*LI71_UfemIu$3KUT`28FCw$6M7R9c$= z@)%!SjnLE>L@tBi@?9uHIPD|Ng=WVE2k1SR#<80+4;6I7FHd~jt8GZEGJvB>8Nrem zge|=xC=xGF0?2Im$F{Zoswc^{x+`2}!{!32VN}OhngWD;yXy@2_Ymsl3=3m<41yFD zh*e-L#ZxXsZM%9%P6QI~$%Z~I2H6Q?a{(am}5pnx;+ zJpIoxF$hl7L#AxY|A;{$2*)5BVaIxoCV;}tWozHM#C*IQqsu#tP>~v;NLx3T#l&mx z4l@P`h;q&+8u52z3oz%yOwl?^ z0P|9q(M^SS2b!COGubzLW{#$7bLJ-C>YV-SciXXLBTq9!;hSwLsF26Jk{ z@}^yz`;h?h@OQ`F^5-M^Po=T_B^oBP1ekvVO-^ip^KBCAx!Rol@Vt-u4!SPk5myvE z(cP@tbdCONQLH~r=@9uqZqZe(S-&n~Ho4xg$btuwVu%sLE3HL&6i-0YUeW(BWBnus z2VpV9Ut`b}iu%}oU2L`$%Z?&u0yhbnm081wWt(ee-Z1j&Pex(0#egPkw%{$+X4|>e zVb9;$bH@An53^!@>7Es~#PVgO|9A+gUnfl&qp3?b)m!DU#23V+|-}9!({lGe?ZSEwU7SI+C5_Xq_}3o6W(m5 zDW{Y~=q1^b6|wk88KL|89VlMrKObm*Ct4-fxCy!oI>JQP-w_*~RU*~;0ZE-)7SjF8q# z?$N($jrAfOrHW2ENpO?H_2Em7kOpnCFR-X zk>7l9j;|1+4k@?gBkS-CS@`{@V~S^erGJqh>){kUQ)!lY+}b`jH|&?u1hOK_9E|+~)6Zuusm+Yvq+08vtv( zg*#Qc@2ovN_i6fP<6_W0`g`LUYzb`J@Y_e>x_% zC$V44-8Z=!YaT+6FU{uktzUa@UAY*MNr?lXQ3-_)liT$%`bWjFJ?J=<jCFz*1V1Mg?UZVT z9~8yX2}wVB##Ud-$|(BUMgmR-B#DI*!}~L1X~dHXBu|nTgWASLOY1U=v$BVeFpt~w zk*Dq(ayG~ki%&dT%DX=X++xBB6?#Fjwc$_wy&!Jj*LevwEV0w4YA?ZfikNyt-vuW?Ay_oi3+UB%#(&A|=(;EHL{R{mm{&D_;{e9AArA_eX zr;Sd_^nK-f$G0PGzcf$kkEx%hzLWaT)UByEre2wPA^ZrgO>Iq`pIVz*nwp!Mk-A@M zTBlykff`u0tUdpG$~Q&xJvPMMQ3&ik(S`IKyU^}97?p!aO= z(cW0fUf#u?ohcqzC43Tk&70<(=^Y<>!du`yBy^9rZ|FMDH=Y;Z)$bwC1)e)RQO`A= z6`naBGjx&fDBl&KjlSo6(a;H@X5Rz8TSD_flYD3S7WroQDtx0t#i5+gfKa!PJNRAj z1OMgzn}aX;&-6!vj|A@sUL8Cycw+FF;F92B{pv3zP*84eTB8_?+&u!jN8o??2(Wt{dWmmqEp2RGi7SOM zi9In*UDPq29=r@XgjsEP3DY?+-!~Tm@w$Yn?v`w9#$sV^Gme<2Kov|ld z-y@UnF2)}0Aq~lCm1PF}s`mD4NEEkb8|e(lA+qpxX?Rr<;hqeSO2UPhVW=c*cMV-7 zp&MstDhZ2!!>y9&k>OHF9B1^MDoM_|{!~effWE_-Y}5ZxNjzQZzl$VAy&|_~zo{ho zIr~*5$-wv*l_Za1KdU5KrT?Uow@_(=QGkwh21P)Rr)(mz*847>iBN>T?uRY~gLCn`xD6h2A$ zyVN7$ACQyOqYo7@o{M$iu@r+S8u>n#iXqFj;BsN zj~$3X-&VVx{Vvh3>)0Yw+?s1^OI<(mf|O~zZRwdkjZ8Ot*mb@)>UtJ0XaT6+A@ywb zifU);`azBl08Ssw-opeyDZgjwhj2@sSwkFHO7YC3AMC(F8Sq-z&uSH}^19wH0iagz z@B9qjAeZaz4bIO{mV0A&V~~x7A1ZZyDEqLIjS+dnoFA*K(cPywKUP`eKGjjDIBVGO z>@4?SH8-FqO>iPH&G6eLSdU09Zgq=Q%}Fc?O|TwLg6K8DwnP$cyab=(Bv!Q+P#?k? za0~XqrJz*Jr%FM`nqMWce%yk&Ffi7h7Ic(0)wzFWQkdQdOVWkpe9e6e51>Qts0m`W z#D|(7W?q0ms+BW7PGVhX#VSb+FH=d7x;D<4Y|~0r5~Qw;btK_vtb~)y=V`#IRgykb zrIIuTlT@;Fah-dbN@CS$wJJ#;n(Rz2)8Ob9{v9}0AoZwHCF#2r?0Sw%t5>or05mt< zLEgeRcfzV=ZLQ=*)3s(F_@{PrrnJ1lRa2BLR7hzcCaNU0Z@fy<98CbJxq+v#g&iG3 z7^|1EzcXafHq0{XrJzp+TG~K@TRoc{_yIzsb9m_@*$98k0BD7BFxowZUA4(<*9A=D zE3D8L+IqP73uqKWLyNGtKmHkIcdM4JuC5)4)&ffE?rOs0rJZcGpxJZ3u93S;hUT;; zoM{4DB##an-Q9#s*H$v;!cT1#b1wRI44O-oIa-i*CWw_%;bMB6$4yv0S}f=zFr)O$Hy%lt({tj&v_&*OZj1TL!nt?50rqQPkW%cCy)Jq1(%@ zvlGiSw?}yWO+e>1x*HkFqa%?_ZpPue@Bh2+|NqOcUAvpNdj$ShjKJ>uKW*U@;~X3O z?)yI-X!xK{PZ+!J|JLzt_x*qO{eR5v`#-|4?7si+zW-~x@Bd?V-~XlV!vDhiKk5G$ zxwg2{A5PzpUX`8>DgU`?b!i7cc7H+YEcgI;8`1sdrR*DeD|B&aZm3`I9mL@~JUAfm ze&FIjUEo0fhyF|b^ZWySAN#KHHTZ^lw|lSiHhVKY+dVgXT0BFI?~PlGHltAAp+BIn z)x)~3J*l0hP11V0-*o@OUFRO``V61{dw=;cSQslK0K6PRg0Q&N91ClM*)MmOia@0M zqKdrto(Eql#`J<1Y>SnbDh9ywusP|$dEFPLO_r)DPzZrqi7I>;dLVm$V@w+ngN?D$ zqF*2N6 zVNA)7v1Rcdr)EQTJWQRw{fu;iCzRsu~9)tC7 zU)anF!)a6kakRFWCDK-d^kOi(DnD}Jl^@jpVpJ8xVDqbHNB}Hu?I_o-KJtkca)E_< zLfNZY75aZfJn;;pvM>f~-@ee*bbJ<%*5>}*wq8>*>j_IYjtC2SMqHnZy+P7)HzpRw zVCtJxA9UG~qeU*dW%56TiXFaAN>lyC_qjO4(UXk{`7xOLsx`qM0A+SmXfQ(PIn!=^ zuJ|?=`N$S+lrg?A27BM6Z-e|g%=IPxyO|xRi)M?STi@p56NULeMP3ZXzI}atoP7s% zb`0|!IP0y84ic@dz?@pfPS#3Tzr47K0?&xB1vRrV`8x9P5L-4JRRBIi*ircK(i%i1+zse(%gD3PiOOfqjXFR*1Smw zAcN^JJyl=)G>3+q*9U19+6cHjwCA#OH`W*>`7zk?Cbb?S^^RJ5z6FBe;^SN-t%`~Z zW3b^>^NohF1l9~&l+KOkw!^Yb6$}3iMz|;jJKnyqkP|~M0IDjFw!Jy=L5w=v=h5*oz+eMZV@y#D#=1!uXfYO~{T}E~58|Yr$m`V6eg@cc)>rlY zPBIE}VzAEb3$sbb2+)!$+R?D=>&re=z2?lFtafRP4#!}Q+ZW~!j#{*kGIzc-^7+@U z#H6FwqVGI%a_kA@P!Fr98U>?bu*OwuhC7EvRkZWxITxe{=dwmBf4)Dq0^#G*bZtH$ zv{ElOMvaQW@;0e5xOT_T&mXQi@I<#@%8ovB*=Lsyo;;<_$R8Dhm2FaG;^_VTvwnK? z!dZ5FYHPx|EQQPgjy6UX#$ZRAL~>XaroCVsBqs@mi{*M55stxLHfiV}BJ3FD-*|$y z0KO{{CLPE`?+GOTmkY@1+ZbLHgQcvRb}T2cGfaADmhaZdn-fi8fKLHD#;#F~ypb_j z%l3sjs+d3l0NNb#qqL5&Xik;s@dm85^-n!TL0AAG`U z2C(WdzyNf78Kxh7e0skbPP*$~t4jO4_)uMU(->M5gUM^s#L+S3;Qe2W9QEB*j{Z9u zzyL#5(mBOAv@kZ6Fj%`Ye*(n4ZRg(p@skewTplOH?8%9z;y>SQ`w4D+jLgE=luq@D zK5cf*%$Aw8HTYaCx%D9hv0AD>*g5z_n+FAY-hR-MnKr*e3q=2^aPS9buLa9Ch7`so zJHL*yOnO=ANRQp}-LRZZ@+?JliG$e1-BN_%>%pUAlc>Hm^$dXX^0uAxr}RA~W?FOS zsE*PO7ZL8PzD7nkRzuY-w#C(nHu6)h^!TpYns9V=;2@BZf1Gd-c#m;VVXV4S7l?d% zz6b;Hm(CYWAR1C#pyFQpvR6xEa6zn!>RSWIZ2(_158PwyL)qW-97p_`-1gc7K!t<8 zV9$2Ofg@s-RNJ!r<=RAlqpf2sPhYkeIxa|skKI8x8iR^r6RD=P)p1Rtto$B7Ui9n@ zJ014Mj&C!7FGjv{i~|Z|6FT)Av}GP};I!-BE`rqppE9sxs zDp8k;9&d%y+KY_7qhqC1*IF?QK-_d{X|LEj>u}Fw{dyjBX)n&DV?QQydSl;$SP50P zsFkbJa_H)^rwZj|uTNgAm}*K#Wv|aT{E|KaW1sw3xKoFTm^=Hy z=ek@Z9N-(hN&bIDlSs(!NQ1HSKoT4J=A4NZUUxJ@tpwH&Y)^y*>5f)RW+6U_ok4>S$kS>cMzz*qQRN z?~s(oQ|?H)GUbevV^bESRHh718JLnD`Xlst=+)38q1!@NgwFNt@4qRuKD0PAGc?|R zMrfq}vQS27AO8w}Lnsvd&A;A1&0ik;!k^*a$3HUoui(?c+k%^d?ZKmi3xhMDzh4Ob z{k?)7e<<*i?{dU3_`?_Rz3uDp-5mHN@LJ%Bz&*ZaeP{aG0@wK#`lds-e`DZy==RSF zR0YC;+`v8o&Hs`A1^>3c_h_*jYWE2Izcm7^8LF%s^D?v(`x!-kO=Ts;x6oc>4YcI) ztPJB2)~|9F45_v>{)}EpAIi-z_D(`%Ii@xaPJ+$JFop~;N}P@A_&49LK50O5ab>Au~%iwAU=MsdDN4WTbtcq7rUR=X- zl%3(eRD2CNuFtuT6=kKCR$n)cR$Nh9wrq-aGOJR@Py_BgpkX$KH4b%<7V=@^ zZe+5CAvRh*wdk82N$A~Qt&+G2>Q|{G&IENr~gAGaYWNEQAwQ2^ovyz#{~T%PNLtU3l}<)=)wgmi3^E-zDiP$&QnS1 z!nrC*Z9YdOsm*7rB(?c0m83?Vsgl&lGgOiqc{&e0#u*yF!a&)*XHpiXT4yFAppqTp zYj^Ohib4e1w!A&L-K=pc^M!94`Go^vedBhE68&IDLK}ie>H%4D2uD>HP#GMB8o$}Y z^ALee*Pv407xBntwyp`=21>KKZ5sELlO#@0S4rH`w2(?-lC)GLxlxF{vAjv!%b9G@ zx+`Er=FlEgNgQ9b6i0G#owiK`7f%eTq`fT1br~GZK;t`e+$8N(Ug4DuGRwU;FDZ&j zpxwzU8_ubk5zty&%L~kQb(^Ofi5WS%&ML09Gb@^YSZTtViIW&V_wU>UPGX3wS89(daAdf5swCEqTi7!(I2Nh< zPv$3_6yaI4M-)8PsrwH{5<~3T$H>EuL8~cDTn&TRBD8$nWz3jG5yG#KyC&CKCZ36n zEzPT%^3l+Mro8-VABdLQjsdj~MPte{g=4kd_J%F2(;5T+GE_8rn;88`o#^);Pgmq=e<_!)ZoGS%$u#a0opvrsen1R^gOR^?o=Sw zrgn}PEw&2h8tw}da}db=TQOL|a(pD+fi>6DNqJew`$+W2hRe!uedx@By?Abh){&Hz zlcDe7*l>h}Id@HaHwiR5L+_T9m6f5VJNiaD6~+W#!pFZ6v?$uc1w*96U)N(!G+g9* zGzpCr6F|FWh%w!He6syBVD;GSSZtAc@1*lz60F`c)0o{QF3Xc5v0hX{lhE0Hk`8V* zHVazsmDFep!u55R4Y31LXg79=8s!#h*KfPRASvgvEYiVg*wg_)LyYK&G-?01sZ@)0?zW?vO|L?y4@4o-Hn`wACtBtZEM=GX=dt<)VouUO&yi;TgrWq!jB65 z8G0bJI#dw+A$V)>$Y6fp=fJIjBLlrJPrr=4()VmE8cv~qTn+d@B1=psh)C0 z&QH69)LD!(^W*S&uQXv80MA1mw&j6T7=~^+;p8G|dvL~xID4|U)SnoD)YEL;aMu2T zW;oU40;w5(dUhPX>-+i=hL&`KY&};ww45i^Tu&Plhf{ioZZlyeT_9Uk5Gry~&6RbV z?Q*Fucxpi$?&y{J3^xl>AJbMAlL$!L$S$;0#ZD=V!|A-5Sxl->^PH?r8T6gjP3jCm zfrqt&Rk4@c_4hP3nsKrEVx=E(4W<3I2n^92h6(#)lD--p+QA644=+(i)8- zSL0l%v362nd}yccV~{#5!e-6p#I|#>B*^X4w}`={n%DYKaX5=tYIqoTp%Z6nUv4~X zMH?H{NKHt)tmwCl$5vf+yRmL`9A4s;Vw(Vf=G%nGFE#JqT$iqslay7uAh zC#P>V)`sKo5U*508Gy8y$GUs$y~jPU>XBRs^XhB@gQVxaa#`I+#+t%7T*IsN$v8m4 z9lkfdOS|%Rg#01mw3RvSVt@NTTSoo9+E|?*X9w|?P9Osi1>2mLxPxjx6x)|XOo46%ic4`3uS=4>J8HZDN#UU6Asay8i^X+(GMPI3m4w_782to9G!c>R}5NyyltCpLqH>_lsT>P4$F>URcRbP;rqhFvse@i01n?_>vKyoz%VAE zVJNBBy)r)zr|)WAG8$Skwpy-DO{z$;xuc8X@adj3fV50(Wkm7~g(-GWBCQAo5af79#`16+9@5oXWB^)drR{0q zP<;p=XC+m_ZTK>%m|M>n7(m)4HjjzJ ziMgU;1|TxGt;Z7Q&n~wt$d;AH;ip_tF#`~N+GYiE0I?XX?FB)|S$0oQ(zak}I1V4> zN%KHdY^#VT&qFQaU=XR~-Bc8ZoARX5AY!${S|#BStsnpy+-k|FcsIgeDQ82|LF@|S zcFZ_s_^C2jsR(RumsPl#OpPP*=J?pm^^*8LCJcdNz)U!z;O(1_{t+ z+BQXryg-#pyQ0No;;EewU>b^S+YHGHB~M}mRBjn(EGmen5CTivngM8ZZ4G+|FI9zd z=oc2oLsZ+k2!M9QadF(bi&pRW1Zv{U1*{{6Dn2o=#&hTc)J41#KOcWt%vWaMx)&Q{~dS zZFV@W5rTwCAPTf?G?FJlj39wnCK|Jf;_gnvKrFzHF_(mcTfqP_%-qcUxQj4Ys~AHh zSir1xFqJq8K_S@Egf2d+DE>?yB3(iQ6Swa%uJh^s-=1!!{gie~T65aq)K5~cOKnUY z4FCMsq%@@*9Qq)1QD{bJ@8FBUGhqF{f8edag@KuYzW%rT=lkpYnYjO7=Ud{-^Zw$! z!+V@}jMwdX+|%xv?CE9v%ecgtXAIWA(QnWf>HW30wM(@IZGig&ya3d@`?=o2$GiFq z$Ju(_8d~-uie0*6gwZqZDTcixtt@+u(1AmN1Yj58zrc8@D9%>w7KJeYEzzkRx14_0 zz{hPIbn+#93g8in?mEnPF&t-W_QY0TjZQ80j=$(%rNF>kg1EHYDnu&?WGrIV&FE&l zP!MO6c8g6h0G)d)Evs8O!DP*%3WHPTUAy2RFS1dZr-G=IsfChZWFab7?sN#AX;T7fK%e zbWxlQ+!F_HaYKjY6ToUbzOM)XETW@CItO^FG~PmM%R2G$C-CUeVVgY5@7vucMxmgj z-Sd;B@#aow=*VH4pvv=>q9Y3pLrFW-CyL^1`<~DrVgt_5tOmg=s&~$uyZx#py<5%|1ON`a)=I)kq!Cd0asfSB z5NCULYyJel;?|;_*X{A>^X~;fzV;qdtqMW7=RNj;@kl|OjolM!(++mfo|g?hty=`O z*$#|wh}vA(2Yv!lZZ#gxkF&YE#TOYpy~a$83_YUem0Ro=jhqZs6oc94p%unMBjRl8 zZatkb06n*ic0Lp0vZ@NC$=!oFaW-+c4Bi-k)}#F@iPkzY#k1rCg>g1+x3)3{z#=v6 z$0tc=v=_^-aKGnp)Aoc3!a_3bYX$KsIYFE+Zxh=_#o40WIzRGM8Ddx&V90l5wW8Xb>03K7ojHpeV={*LF_&($A!Z@3#$>^L2}XFaS+y z>H3Br2OkdWaV9gGHdPecwE2N4PZ~ED#S?7T833E2X}_2ye7k?G0+8=zHxBZ6Bm!m9;l`G%I2*8A1j7I{`HEodEQ?^S9}{Pzb?ZsG@6^uE*Q_3GUdIBb`prp@|t4rf-%ASCRP8GHniG=44CmM#5ek86cinBd?!kEy}!ZwXgc&dBRV>fZR}{qAh~3(%7=WhK+R()T&{3NU<)(glQM}wq5CT08`4-2w4wAGz z({BYJx&LKFakgl;hK~V2{2ewgktBs~C=3iBIm)H^@dUee#z6F9%Y|%?sEXvP=|2kN zY}cMJdc=*aw|8gn`@@BblO`Dhv(A$f4J*nSNi?w zYasvso#p?9sXwRQo4PvHggySYl%rC5gl>YReIMA-HwF(0d=a=RFeTvjKjV-1i+sQM zF7{3G>E0W>jouv3PoB-5vG54+u(8z0(cjWf(@S(eJpMIn2fLqjp9qQnulU&C{hQf7 z%o+G_DH+-(*w~u$*PjrsI^i=J5Z4OEDKs+rS9et*;<*nl`#gK+wGT~kbq9;@&aq)> z-<+ohzJBe9(!Rl2U#}6*jfB*Ws42>QFMNB=@1HF7bequ*scKj4vkbM( zZobkVUz|3@iApjjG)VQwhEIQF>0!TJ|AuFef_8{mdthRE@Wf1-QeJMhwbrLSw8wm` z9*jRC*D`y;Q$aN*R{*rl_5O6$SWoK6b_iFy!`n)Cu3cU}$y`=a6mR^JBAjE0$bJg# zz%Pn%&c)^Q4t$U&WqkYK6jugb5i_`|WuYTXmdvpK#XFvQy%F?HCX$cQN!D%9R^ z){WJk(D-%;S{WIWWir^mhpe_864#-4o)olN+E`!LI-e|Do0qbUmU-B^Z8aa1*UMHh zB@n@L`#Ql2R^p~0y7s*97oK&J<5b(`I2 z-2d@)2}_xp#AVTM!~JJ>c>Lw94^%W7vandBq(GI(Erii{yM>XupJoi}0z`<}@ zN4>=4o7)bVY(8i&pNB-n_5;Jk{wMx5mlx05bLJ(lff_jsI+2x4QM;;AR{QfmU+?MB zC$vL!I|NVNL&TsXN>@u`3qn*afHi58+0xqU^8H`#iT?7XfWl+r>RX%))y6U7J?>HM zkmL4&n;S8OrHu$_Shoy;@3-uI_iAucw2VFE2uhHJa=Y1?(+=rvPt1Q$Zpv7$+IgRn z`LfoZ#qE&W_QR>XAJ-~vY*~fIgKnDk|G*e~$0caGV~x@&)B!U1?Zz#|4;wosw?lNB ziB+1}mQmkQgP@6HU9Oz<3lv#r;2vScL(4=Fd(Uk=IIqbl1DZyHf~(5XdU&G__=DI zdHTe4@LLx${+Q7YNpC);NsyJ|eh5fyyTLGXwf!?E9r-~5N3Y%mY|bx}zKHI3m+^gR zI|RT3Fdd?q)SVRwJBj(2cj;x1U%hF+g#OYt$X&KzuB2zdCgbPgc1VG{+7xm#%x^?`?cHwjCnj0hm?MmX@}bRZSez%Um}9zH?42c@MO~M;KeBNPR-|biyuPk!hcGw`W1PkFm7TdjMhsit zR$tEpGT%LG=*e#*<~Pei`G%33(G{FQ?gm4~SJmwh3}=8F2sVc}osPhs=DKUPT=ebe zufYXa&`mMyt7uoK2Dv7KUN1xl+l$*-Ol)1`0me~5&Bg0iUHkFlKVy7&#j5cEz1uMH zG1m=0rknBQnD%ZduHG2t-hwhYmZ;fq`A>Il58r7Gv05@YgaXk<&4#NG-rIeJ@%gCs zbV87+5Fq}jIn(H#`s>!mY?r`t$)Pfr`u+jWr+(wJdF>Dv7lPml`RgUE4a-+G)z&pN zE}~d*!T=eO-OMA+_uhSEa}TRM|EImH4T`He!vXfP3-X~EH8E5Yz(7_)VYz%mlCvz3 zO=N+vQNX4NEZG7n?&1O}NgE|mIclO4m6Yk&Hj{~Q>Wog4sUyZ#ciKN4?M&Nzn8ex| ztVxVy!YI@pG--9^jG<@Gs}JNd*6HRIq%2woPDpjF%fZ>watBo1Fe$X z&zL@G<{*>{VV~wBwG*@kB|@{84gR)i68iwjMdE{AA1?KVi8;_Yu$P-I)^gCxOX24< zk7d}(VtKW&W0F|Au#tmaF2}qom-FgAzfXo;V`g=;vsbeh&&u#id5M@=@gyB9_FPQ* znd!WTgLSS%2(2Ww+H1&aHuif1_xX!dwM12moNWB2m-p{`>D!2;G%ZSrgOw8(yJAYM zbDVa|p%M4fpPN43%)v~nxQIH}McDXl$9}Z58{0foC`UpwpG&au+kcJ@z?r7MG;xsE zp%pLQ6z_I#aoAnVzdNh*t|dqC_~CM7(nXd7PzaNn*d#Wxm_Ay?&0PW5-Kwc9xFf#JD;=c9PqSAU2k}%l^wVbGH9aEvp zptKF@fDV;Cn*VPJ9h8X9AJ=fAt#w3%1du5OjZ{L~PaZ%YJra;P#rw4r4~x5qxV|7B zjUAE3=m8y;?=1@ITO!Z0iJjtX9g83VlzKs9A6RG%!k#`!$bDdE)=Yd~sED`)fp-`l zLZW=+Z1h_o0qG`A*G_yd<`x9TXS8Gv8w5FoI4B!3zqe*$N6hsF`e3w46a#<%k<5n7jEplGlNs$9Y3c8#KajpW?Nr*MX=OLOkG=l|sQw>L zEll}!N+2c2@}A`>ONDvH{H)oLJezztxhHv<@&~0uQB1!x4VhLZok*HUs!jYbad%=v zVgl6v|L=b`0xLO@Q(5VmaNd>_{fDe6zWF9~fc#m%NtyY34~%VH{!mv+Lh*J#XpaV5KUth_sLY3osvg!ld zd@AAsRS7hq;oaNjkj}aM4adqC71qVUSdnfjr{|EqKHcj$m@O+xGA+%+stq^zJNAj2 zPEVbM!IbTj!4;w-dW2BJ_L;S>-mI)y$H8`yF0wOP?3#4l!@+tjH@ko&rI-^fvb`F0 zTw^289mu^o|0^V)l+21CYhXZdcYs(ZD~dd4fA%!024%IKgAU6=(9o(zDa1w`qiv4! zfHtH~+^Q^VRjVP>gbio+n3PqUIA}4|yee*LQEIEU+1Q?Uk38_heIiezM_onk6qolZ-G*$a&Ts$+O%gonga#Puqo;Fm-975j^eodzZqNU4!)q=)ylz> zg%Wlx7xcFec>Ksw+5HDRc{7hs$!vlW9kK>ay)9bwVA{H4o6D8v2RTTy4H&8o^gLat zb%#eVnbBcvaHOXOv!cohJPPgUkot@KmOWB`^rY(@{Aj66%h_lYY1Wim=W}}FhFw2> zE=?(K=AhiHh-%ubY-w~2<2A6Ofexg=0p>mTue)zNu~*)UxQ3PmyQAeUJdck3XOxoF z9Bf}a5YUc2mNtyc+m6es}*%!os6Ch9@06xVaGaiO~3xDM-$s57N0 zLQ$B=;ag{u_r9Tgb1erSmxnYjkF02Danx_=?-?5L`q-3W;|p7nNTJ$ggAno z1WG8S%^Wmc0qUv(3E4G!M`bIYCNHbI^7L$mL_LtAuq~i=0`xunvn~tYDch!zXCIAlycX=O4lEK{}Pfb`B0EW?Ifk)6sF2XSM>BQJ!0ya-6))o#TkQ7GQMpDMU(#<4=6~)IVqYP_j{3R7%M`c`|&Db*si{drPkDIQq4+Vgm<< zSd2JbOwY#wEZP6in1&=bxa@6gYyA^%ZaZ`gqd_4LS~c`7j%1U?mVaqXXm;h#MOm^7=-4d;-oJI=IMjsrLgINjk-+@LzKXzu2|@i`g^E^1C@m$8yAat(tea z;%R=i^A}yyiO@{rD>2ykqLV-PN8z?ll)O$3BC;6Iu9zTb550ih-Wx#J8t|1?SFyms z4*%z`e1xS*Yg1z|s3O95cZ_@z3mkrEFtBr1lG~qha#G1@;ovVx9}Pl0(Rt`gVx77|7q5$F zUT3d9O60>P`ud`hfS%+)CIXX<|Z+1p2yhSj`UL|UuFtJ>UKoLY$7 zoN#YZ;OpZu_)sm1pYMhaCkJ;g^i3&t2d@%V<;hr&QhR8?FV>G(H{Ii>tw$gEyL_au zL=e&8#~%Z@UGkQa&3L6qKXW9JDWtU4DHT@cOg+hW&CVdwU`~jK5qUHN(SG_(WeMXI zQU!UMNTK>zs0%%FEtr|R12345|PROT3_HUMAN Protein 3"), + ] + setup["mock_session"].execute.return_value = mock_results + + proteins = reader._get_protein_entries(is_decoy=False) + + assert isinstance(proteins, dict) + assert len(proteins[1]) == 2 + assert proteins[1][0] == "sp|P12345|PROT1_HUMAN Protein 1" + assert proteins[2][0] == "tr|R12345|PROT3_HUMAN Protein 3" # ">" should be removed + + def test_get_main_score_structure(self, mock_reader_setup): + """Test _get_main_score method structure.""" + setup = mock_reader_setup + reader = MSFReader(setup["test_file"]) + + setup["mock_session"].execute.reset_mock() + mock_results = [ + (1, 95.5, "XCorr"), + (2, 88.2, "Mascot Score"), + ] + setup["mock_session"].execute.return_value = mock_results + + scores = reader._get_main_score(is_decoy=False) + + assert isinstance(scores, dict) + assert scores[1] == (95.5, "XCorr") + assert scores[2] == (88.2, "Mascot Score") + + def test_get_secondary_scores_structure(self, mock_reader_setup): + """Test _get_secondary_scores method structure.""" + setup = mock_reader_setup + reader = MSFReader(setup["test_file"]) + + setup["mock_session"].execute.reset_mock() + mock_results = [ + (1, 0.95, "Confidence"), + (1, 15.2, "Delta Score"), + (2, 0.88, "Confidence"), + ] + setup["mock_session"].execute.return_value = mock_results + + scores = reader._get_secondary_scores(is_decoy=False) + + assert isinstance(scores, dict) + assert scores[1]["Confidence"] == 0.95 + assert scores[1]["Delta Score"] == 15.2 + assert scores[2]["Confidence"] == 0.88 + + @pytest.mark.parametrize( + "sequence,charge", + [ + ("PEPTIDE", 2), + ("METHYLATION", 3), + ("ACDEFGHIKLMNPQRSTVWY", 4), + ], + ) + def test_compile_peptidoform_basic(self, mock_reader_setup, sequence, charge): + """Test _compile_peptidoform with various basic sequences.""" + setup = mock_reader_setup + reader = MSFReader(setup["test_file"]) + + peptidoform = reader._compile_peptidoform( + sequence=sequence, charge=charge, modifications=[], terminal_modifications=[] + ) + + assert isinstance(peptidoform, Peptidoform) + assert sequence in str(peptidoform) + + def test_compile_peptidoform_with_modifications(self, mock_reader_setup): + """Test _compile_peptidoform with amino acid and terminal modifications.""" + setup = mock_reader_setup + reader = MSFReader(setup["test_file"]) + + peptidoform = reader._compile_peptidoform( + sequence="PEPTIDE", + charge=2, + modifications=[(0, 4), (3, 35)], # Acetyl at pos 0, Oxidation at pos 3 + terminal_modifications=[(1, 1), (2, 17)], # N-term and C-term mods + ) + + assert isinstance(peptidoform, Peptidoform) + # Verify modifications are included in the peptidoform + peptidoform_str = str(peptidoform) + # The sequence will be modified with UNIMOD annotations + assert any(aa in peptidoform_str for aa in "PEPTIDE") + assert "UNIMOD" in peptidoform_str # Should contain modification annotations + + def test_compatible_versions_constant(self): + """Test COMPATIBLE_VERSIONS constant is properly defined.""" + assert isinstance(COMPATIBLE_VERSIONS, list) + assert len(COMPATIBLE_VERSIONS) > 0 + assert all(isinstance(v, int) for v in COMPATIBLE_VERSIONS) + assert 79 in COMPATIBLE_VERSIONS + assert 53 in COMPATIBLE_VERSIONS + assert 8 in COMPATIBLE_VERSIONS + + +class TestMSFReaderIntegration: + """Integration tests using the real minimal MSF file.""" + + @pytest.fixture + def minimal_msf_path(self): + """Path to the minimal MSF test file.""" + path = Path(__file__).parent.parent / "test_data" / "minimal_v79_test.msf" + if not path.exists(): + pytest.skip("Minimal MSF test file not found") + return path + + @pytest.fixture + def reader(self, minimal_msf_path): + """MSFReader instance with minimal test file.""" + return MSFReader(minimal_msf_path) + + def test_initialization_with_real_file(self, minimal_msf_path): + """Test successful initialization with real MSF file.""" + reader = MSFReader(minimal_msf_path) + assert reader is not None + assert reader.filename == minimal_msf_path + + def test_len_with_real_data(self, reader): + """Test __len__ method with real MSF data.""" + psm_count = len(reader) + assert psm_count > 0 + assert isinstance(psm_count, int) + + def test_iteration_yields_correct_count(self, reader): + """Test that iteration yields the same number of PSMs as len().""" + expected_count = len(reader) + actual_psms = list(reader) + assert len(actual_psms) == expected_count + + def test_psm_structure_and_types(self, reader): + """Test that PSMs have correct structure and data types.""" + psms = list(reader) + assert len(psms) > 0 + + first_psm = psms[0] + + # Test required attributes exist + assert hasattr(first_psm, "peptidoform") + assert hasattr(first_psm, "spectrum_id") + assert hasattr(first_psm, "run") + assert hasattr(first_psm, "is_decoy") + assert hasattr(first_psm, "score") + assert hasattr(first_psm, "precursor_mz") + assert hasattr(first_psm, "retention_time") + assert hasattr(first_psm, "protein_list") + assert hasattr(first_psm, "rank") + assert hasattr(first_psm, "source") + assert hasattr(first_psm, "metadata") + assert hasattr(first_psm, "rescoring_features") + + # Test data types + assert isinstance(first_psm.peptidoform, Peptidoform) + assert isinstance(first_psm.is_decoy, bool) + assert isinstance(first_psm.score, int | float) + assert isinstance(first_psm.protein_list, list) + assert isinstance(first_psm.rank, int) + assert first_psm.source == "proteome_discoverer" + assert isinstance(first_psm.metadata, dict) + assert isinstance(first_psm.rescoring_features, dict) + + def test_target_and_decoy_psms(self, reader): + """Test that both target and decoy PSMs are present (if available).""" + psms = list(reader) + + target_psms = [psm for psm in psms if not psm.is_decoy] + decoy_psms = [psm for psm in psms if psm.is_decoy] + + # At least one type should be present + assert len(target_psms) > 0 or len(decoy_psms) > 0 + + # If both are present, verify they have the expected structure + if target_psms and decoy_psms: + assert len(target_psms) > 0 + assert len(decoy_psms) > 0 + + def test_psm_metadata_content(self, reader): + """Test that PSM metadata contains expected keys.""" + psms = list(reader) + first_psm = psms[0] + + # Test required metadata keys + required_metadata_keys = [ + "ms1_intensity", + "ms1_percent_isolation_interference", + "ms1_ion_inject_time", + "main_score_name", + ] + + for key in required_metadata_keys: + assert key in first_psm.metadata + + def test_rescoring_features_content(self, reader): + """Test that rescoring features contain expected data.""" + psms = list(reader) + first_psm = psms[0] + + # Test required rescoring feature keys + required_rescoring_keys = ["missed_cleavages", "total_ions_count", "matched_ions_count"] + + for key in required_rescoring_keys: + assert key in first_psm.rescoring_features + # Values can be int or float depending on database storage + assert isinstance(first_psm.rescoring_features[key], int | float) + + def test_peptidoform_sequences_valid(self, reader): + """Test that peptidoform sequences contain valid amino acids.""" + psms = list(reader) + + valid_amino_acids = set("ACDEFGHIKLMNPQRSTVWY") + + for psm in psms[:5]: # Test first 5 PSMs + peptidoform_str = str(psm.peptidoform) + # Extract base sequence (before any charge or modification info) + sequence = peptidoform_str.split("/")[0] + # Remove any modification annotations + clean_sequence = "".join(c for c in sequence if c.isalpha()) + + # All characters should be valid amino acids + assert all(aa in valid_amino_acids for aa in clean_sequence), ( + f"Invalid amino acids in sequence: {clean_sequence}" + ) + + def test_unique_spectrum_ids(self, reader): + """Test that spectrum IDs are unique (within decoy/target groups).""" + psms = list(reader) + + target_spectrum_ids = {psm.spectrum_id for psm in psms if not psm.is_decoy} + decoy_spectrum_ids = {psm.spectrum_id for psm in psms if psm.is_decoy} + + target_psms = [psm for psm in psms if not psm.is_decoy] + decoy_psms = [psm for psm in psms if psm.is_decoy] + + # Within each group, spectrum IDs should be unique per PSM + if target_psms: + assert len(target_spectrum_ids) <= len(target_psms) + if decoy_psms: + assert len(decoy_spectrum_ids) <= len(decoy_psms) + + def test_score_values_reasonable(self, reader): + """Test that score values are reasonable numbers.""" + psms = list(reader) + + for psm in psms: + assert isinstance(psm.score, int | float) + assert not (psm.score != psm.score) # Check for NaN + # Scores should be finite + assert abs(psm.score) < float("inf") + + +class TestMSFReaderErrorHandling: + """Test error handling and edge cases.""" + + def test_nonexistent_file(self): + """Test handling of nonexistent MSF file.""" + with pytest.raises(Exception): # Should raise some form of file not found error + MSFReader("nonexistent_file.msf") + + @patch("psm_utils.io.proteome_discoverer.create_engine") + def test_database_connection_error(self, mock_create_engine): + """Test handling of database connection errors.""" + mock_create_engine.side_effect = Exception("Database connection failed") + + test_file = Path("test.msf") + test_file.touch() + + try: + with pytest.raises(Exception): + MSFReader(test_file) + finally: + test_file.unlink() + + def test_read_file_method(self, minimal_msf_path): + """Test the read_file() method returns PSMList.""" + reader = MSFReader(minimal_msf_path) + psm_list = reader.read_file() + + # Should return a list-like object + assert hasattr(psm_list, "__iter__") + assert hasattr(psm_list, "__len__") + assert len(psm_list) > 0 + + def test_reader_reusability(self, minimal_msf_path): + """Test that MSFReader can be reused for multiple operations.""" + reader = MSFReader(minimal_msf_path) + + # Multiple length checks should work + psm_count1 = len(reader) + psm_count2 = len(reader) + assert psm_count1 == psm_count2 + assert psm_count1 > 0 + + def test_multiple_iterations(self, minimal_msf_path): + """Test that multiple iterations over the same reader work consistently.""" + reader = MSFReader(minimal_msf_path) + + first_iteration = list(reader) + second_iteration = list(reader) + + assert len(first_iteration) == len(second_iteration) + assert len(first_iteration) > 0 + + @pytest.fixture + def minimal_msf_path(self): + """Path to the minimal MSF test file.""" + path = Path(__file__).parent.parent / "test_data" / "minimal_v79_test.msf" + if not path.exists(): + pytest.skip("Minimal MSF test file not found") + return path + + +class TestMSFReaderPerformance: + """Performance and stress tests for MSFReader.""" + + @pytest.fixture + def minimal_msf_path(self): + """Path to the minimal MSF test file.""" + path = Path(__file__).parent.parent / "test_data" / "minimal_v79_test.msf" + if not path.exists(): + pytest.skip("Minimal MSF test file not found") + return path + + def test_lazy_iteration_memory_efficiency(self, minimal_msf_path): + """Test that iteration is memory efficient (doesn't load all PSMs at once).""" + reader = MSFReader(minimal_msf_path) + + # Should be able to iterate without loading everything into memory + psm_count = 0 + for psm in reader: + psm_count += 1 + if psm_count > 5: # Just test first few PSMs + break + + assert psm_count > 0 + + def test_consistent_psm_ordering(self, minimal_msf_path): + """Test that PSM ordering is consistent across iterations.""" + reader = MSFReader(minimal_msf_path) + + first_batch = [] + for i, psm in enumerate(reader): + first_batch.append(psm.spectrum_id) + if i >= 4: # First 5 PSMs + break + + second_batch = [] + for i, psm in enumerate(reader): + second_batch.append(psm.spectrum_id) + if i >= 4: # First 5 PSMs + break + + assert first_batch == second_batch, "PSM ordering should be consistent" + + def test_all_required_psm_attributes(self, minimal_msf_path): + """Test that all PSMs have all required attributes populated.""" + reader = MSFReader(minimal_msf_path) + + required_attrs = [ + "peptidoform", + "spectrum_id", + "run", + "is_decoy", + "score", + "precursor_mz", + "retention_time", + "protein_list", + "rank", + "source", + "metadata", + "rescoring_features", + ] + + for i, psm in enumerate(reader): + for attr in required_attrs: + assert hasattr(psm, attr), f"PSM {i} missing attribute: {attr}" + # None values are acceptable, but attribute must exist + getattr(psm, attr) + + if i >= 9: # Test first 10 PSMs + break From bf5feea09d2557482a22782479c61c9653d8202a Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 12:59:32 +0200 Subject: [PATCH 03/11] Replace file hash based tests for idxml with actual unit tests --- psm_utils/io/idxml.py | 39 +++++-- tests/test_io/test_idxml.py | 200 +++++++++++++++++++++++++++++++++--- 2 files changed, 215 insertions(+), 24 deletions(-) diff --git a/psm_utils/io/idxml.py b/psm_utils/io/idxml.py index 953b27b..d4171fe 100644 --- a/psm_utils/io/idxml.py +++ b/psm_utils/io/idxml.py @@ -247,14 +247,35 @@ def _parse_psm( "idxml:significance_threshold": str(peptide_id.getSignificanceThreshold()), } peptide_hit_metadata = { - key: peptide_hit.getMetaValue(key) for key in self.user_params_metadata + key: str(peptide_hit.getMetaValue(key)) + if peptide_hit.getMetaValue(key) is not None + else "" + for key in self.user_params_metadata } + + # Extract qvalue and pep if they exist + qvalue = None + if peptide_hit.metaValueExists(QVALUE_KEY): + try: + qvalue = float(peptide_hit.getMetaValue(QVALUE_KEY)) + except (ValueError, TypeError): + pass + + pep = None + if peptide_hit.metaValueExists(PEP_KEY): + try: + pep = float(peptide_hit.getMetaValue(PEP_KEY)) + except (ValueError, TypeError): + pass + return PSM( peptidoform=peptidoform, spectrum_id=peptide_id.getMetaValue(SPECTRUM_REFERENCE_KEY), run=self._get_run(protein_ids, peptide_id), is_decoy=self._is_decoy(peptide_hit), score=peptide_hit.getScore(), + qvalue=qvalue, + pep=pep, precursor_mz=peptide_id.getMZ(), retention_time=peptide_id.getRT(), ion_mobility=self._get_ion_mobility(peptide_hit), @@ -347,12 +368,7 @@ def _get_rescoring_features(self, peptide_hit: Any) -> list[str]: peptide_hit.getKeys(keys) return [ - key.decode() - for key in keys - if ( - self._is_float(peptide_hit.getMetaValue(key.decode())) - and key.decode() in RESCORING_FEATURE_LIST - ) + key.decode() for key in keys if self._is_float(peptide_hit.getMetaValue(key.decode())) ] @staticmethod @@ -562,12 +578,11 @@ def _update_peptide_hit(self, peptide_hit: Any, psm: PSM) -> None: if psm.pep is not None: peptide_hit.setMetaValue(PEP_KEY, psm.pep) - # Add rescoring features (only those not in the standard list) + # Add rescoring features if psm.rescoring_features: for feature, value in psm.rescoring_features.items(): - if feature not in RESCORING_FEATURE_LIST: - # Convert numpy objects to floats as pyopenms does not support numpy objects - peptide_hit.setMetaValue(feature, float(value)) + # Convert numpy objects to floats as pyopenms does not support numpy objects + peptide_hit.setMetaValue(feature, float(value)) def _create_new_ids(self, psm_dict: dict[str | None, dict[str, dict[str, list[PSM]]]]) -> None: """Create new ProteinIdentification and PeptideIdentification objects with new features.""" @@ -656,6 +671,8 @@ def _create_peptide_hit(self, psm: PSM) -> Any: peptide_hit.setMetaValue(TARGET_DECOY_KEY, target_decoy_value) # Set optional values + if psm.score is not None: + peptide_hit.setScore(psm.score) if psm.qvalue is not None: peptide_hit.setMetaValue(QVALUE_KEY, psm.qvalue) if psm.pep is not None: diff --git a/tests/test_io/test_idxml.py b/tests/test_io/test_idxml.py index dae34da..a159716 100644 --- a/tests/test_io/test_idxml.py +++ b/tests/test_io/test_idxml.py @@ -1,7 +1,5 @@ """Tests for psm_utils.io.idxml.""" -import hashlib - import pytest from psm_utils.io.idxml import IdXMLReader, IdXMLWriter @@ -12,6 +10,15 @@ pyopenms = pytest.importorskip("pyopenms") +def _assert_float_equal(a: float | None, b: float | None, tolerance: float = 1e-5) -> None: + """Assert two float values are equal within tolerance, handling None values.""" + if a is None and b is None: + return + if a is None or b is None: + assert False, f"One value is None: {a} vs {b}" + assert abs(a - b) < tolerance, f"Values not equal within tolerance: {a} vs {b}" + + class TestIdXMLReader: def test__parse_peptidoform(self): test_cases = [ @@ -56,9 +63,19 @@ def test__parse_psm(self): "protein_references": "unique", }, rescoring_features={ + "MS:1002258": 3.0, + "MS:1002259": 12.0, + "num_matched_peptides": 35.0, + "isotope_error": 0.0, "MS:1002252": 0.693, + "COMET:xcorr": 0.693, + "MS:1002253": 1.0, "COMET:deltaCn": 1.0, "MS:1002255": 35.9, + "COMET:spscore": 35.9, + "MS:1002256": 1.0, + "COMET:sprank": 1.0, + "MS:1002257": 1.01, "COMET:deltaLCn": 0.0, "COMET:lnExpect": 0.009950330853168092, "COMET:lnNumSP": 3.555348061489414, @@ -79,9 +96,19 @@ def test__get_run(self): def test__get_rescoring_features(self): expected_output = [ + "MS:1002258", + "MS:1002259", + "num_matched_peptides", + "isotope_error", "MS:1002252", + "COMET:xcorr", + "MS:1002253", "COMET:deltaCn", "MS:1002255", + "COMET:spscore", + "MS:1002256", + "COMET:sprank", + "MS:1002257", "COMET:deltaLCn", "COMET:lnExpect", "COMET:lnNumSP", @@ -96,23 +123,170 @@ def test__get_rescoring_features(self): class TestIdXMLWriter: def test_write_file_with_pyopenms_objects(self): - expected_sha = "8d8cb6d8194c5c296f0f5ee8be83d2072be125547b2d51b88100859b001f47fa" + """Test writing idXML file with existing pyopenms objects and verify content.""" reader = IdXMLReader("./tests/test_data/test_in.idXML") - psm_list = reader.read_file() + original_psm_list = reader.read_file() + + # Write the file writer = IdXMLWriter( "./tests/test_data/test_out.idXML", reader.protein_ids, reader.peptide_ids ) - writer.write_file(psm_list) - sha = hashlib.sha256(open("./tests/test_data/test_out.idXML", "rb").read()).hexdigest() - assert sha == expected_sha + writer.write_file(original_psm_list) + + # Read back the written file and verify content + reader_check = IdXMLReader("./tests/test_data/test_out.idXML") + written_psm_list = reader_check.read_file() + + # Verify basic file structure + assert len(written_psm_list) == len(original_psm_list) + + # Compare key attributes of each PSM + for orig_psm, written_psm in zip(original_psm_list, written_psm_list): + assert str(orig_psm.peptidoform) == str(written_psm.peptidoform) + assert orig_psm.spectrum_id == written_psm.spectrum_id + assert orig_psm.run == written_psm.run + assert orig_psm.is_decoy == written_psm.is_decoy + _assert_float_equal(orig_psm.score, written_psm.score) + _assert_float_equal(orig_psm.precursor_mz, written_psm.precursor_mz) + _assert_float_equal(orig_psm.retention_time, written_psm.retention_time) + assert orig_psm.protein_list == written_psm.protein_list + assert orig_psm.rank == written_psm.rank + + # Check that rescoring features are preserved + if orig_psm.rescoring_features: + assert written_psm.rescoring_features is not None + for feature_name, feature_value in orig_psm.rescoring_features.items(): + assert feature_name in written_psm.rescoring_features + assert abs(written_psm.rescoring_features[feature_name] - feature_value) < 1e-6 def test_write_file_without_pyopenms_objects(self): - expected_sha = "148889926276fbe391e23ed7952c3a8410fc67ffb099bbf1a72df75f8d727ccd" #TODO: can cause problems locally depending on dependency versions + """Test writing idXML file from scratch without existing pyopenms objects.""" reader = SageTSVReader("./tests/test_data/results.sage.tsv") - psm_list = reader.read_file() + original_psm_list = reader.read_file() + + # Write the file writer = IdXMLWriter("./tests/test_data/test_out_sage.idXML") + writer.write_file(original_psm_list) + + # Read back the written file and verify content + reader_check = IdXMLReader("./tests/test_data/test_out_sage.idXML") + written_psm_list = reader_check.read_file() + + # Verify basic file structure + assert len(written_psm_list) == len(original_psm_list) + + # Compare key attributes of the first PSM (since sage data has one entry) + orig_psm = original_psm_list[0] + written_psm = written_psm_list[0] + + assert str(orig_psm.peptidoform) == str(written_psm.peptidoform) + assert orig_psm.spectrum_id == written_psm.spectrum_id + assert orig_psm.run == written_psm.run + assert orig_psm.is_decoy == written_psm.is_decoy + _assert_float_equal(orig_psm.score, written_psm.score) + _assert_float_equal(orig_psm.precursor_mz, written_psm.precursor_mz) + _assert_float_equal(orig_psm.retention_time, written_psm.retention_time) + assert orig_psm.protein_list == written_psm.protein_list + assert orig_psm.rank == written_psm.rank + + # Verify that the written file is a valid idXML (can be read without errors) + assert len(reader_check.protein_ids) > 0 + assert len(reader_check.peptide_ids) > 0 + + def test_write_file_preserves_modifications(self): + """Test that modifications are properly preserved when writing idXML files.""" + from psm_utils.psm_list import PSMList + + # Create test PSMs with various modifications + test_psms = [ + PSM( + peptidoform="ACDK/2", + spectrum_id="scan=1", + score=140.2, + retention_time=600.2, + precursor_mz=300.15, + run="test_run", + ), + PSM( + peptidoform="AC[Carbamidomethyl]DK/2", + spectrum_id="scan=2", + score=150.3, + retention_time=650.1, + precursor_mz=357.17, + run="test_run", + ), + PSM( + peptidoform="[Acetyl]-ACDK/2", + spectrum_id="scan=3", + score=120.8, + retention_time=580.5, + precursor_mz=342.16, + run="test_run", + ), + ] + + psm_list = PSMList(psm_list=test_psms) + + # Write and read back + writer = IdXMLWriter("./tests/test_data/test_mods.idXML") writer.write_file(psm_list) - sha = hashlib.sha256( - open("./tests/test_data/test_out_sage.idXML", "rb").read() - ).hexdigest() - assert sha == expected_sha + + reader_check = IdXMLReader("./tests/test_data/test_mods.idXML") + written_psm_list = reader_check.read_file() + + # Verify modifications are preserved + assert len(written_psm_list) == len(test_psms) + + for orig_psm, written_psm in zip(test_psms, written_psm_list): + # The peptidoform should be preserved (though the exact string representation might differ) + assert orig_psm.peptidoform.sequence == written_psm.peptidoform.sequence + assert ( + orig_psm.peptidoform.precursor_charge == written_psm.peptidoform.precursor_charge + ) + + # Basic properties should match + assert orig_psm.spectrum_id == written_psm.spectrum_id + _assert_float_equal(orig_psm.score, written_psm.score) + _assert_float_equal(orig_psm.retention_time, written_psm.retention_time) + _assert_float_equal(orig_psm.precursor_mz, written_psm.precursor_mz) + + def test_write_file_with_metadata_and_features(self): + """Test that metadata and rescoring features are preserved.""" + from psm_utils.psm_list import PSMList + + test_psm = PSM( + peptidoform="TESTPEPTIDE/2", + spectrum_id="scan=100", + score=200.5, + retention_time=1000.0, + precursor_mz=500.25, + run="feature_test", + qvalue=0.01, + pep=0.05, + metadata={"custom_meta": "test_value", "intensity": "12345"}, + rescoring_features={"custom_score": 0.85, "feature_2": 1.23}, + ) + + psm_list = PSMList(psm_list=[test_psm]) + + # Write and read back + writer = IdXMLWriter("./tests/test_data/test_features.idXML") + writer.write_file(psm_list) + + reader_check = IdXMLReader("./tests/test_data/test_features.idXML") + written_psm_list = reader_check.read_file() + + assert len(written_psm_list) == 1 + written_psm = written_psm_list[0] + + # Check basic attributes + assert str(test_psm.peptidoform) == str(written_psm.peptidoform) + assert test_psm.spectrum_id == written_psm.spectrum_id + _assert_float_equal(test_psm.score, written_psm.score) + _assert_float_equal(test_psm.qvalue, written_psm.qvalue) + _assert_float_equal(test_psm.pep, written_psm.pep) + + # Check that custom features are preserved + assert written_psm.rescoring_features is not None + assert "custom_score" in written_psm.rescoring_features + assert abs(written_psm.rescoring_features["custom_score"] - 0.85) < 1e-6 From 9f09060abd417b330994e4e463e4974fe7b8b00e Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 15:33:15 +0200 Subject: [PATCH 04/11] Update actions to drop support for Python <3.10 --- .github/workflows/publish.yml | 2 +- .github/workflows/test.yml | 15 +++++---------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 786ff7d..b7e70e8 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.9" + python-version: "3.10" - name: Install dependencies run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e02148f..36b0fa5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,25 +13,20 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python uses: actions/setup-python@v5 with: - python-version: "3.9" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install ruff + python-version: "3.10" - name: Check with Ruff - run: ruff check --output-format=github . + uses: astral-sh/ruff-action@v3 - name: Install package and its dependencies run: pip install --editable .[dev,idxml] - name: Test with pytest and codecov run: | - pytest --cov=psm_utils --cov-report=xml tests/ + pytest --cov=psm_utils --cov-report=xml tests/ - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v3 @@ -46,7 +41,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From 6edb4c789d09faa2c2efc31c4ce94c43e5c7612b Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 15:37:14 +0200 Subject: [PATCH 05/11] Exclude docs and tests from linting --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36b0fa5..29cc40f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -20,6 +20,8 @@ jobs: - name: Check with Ruff uses: astral-sh/ruff-action@v3 + with: + args: --exclude docs,tests - name: Install package and its dependencies run: pip install --editable .[dev,idxml] From 464bb88b96a57f7e002703df5d309783886a982e Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 15:39:04 +0200 Subject: [PATCH 06/11] Fix linting issues in online --- online/Home.py | 2 ++ online/_utils.py | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/online/Home.py b/online/Home.py index 1c1f8fc..f6eb7f3 100644 --- a/online/Home.py +++ b/online/Home.py @@ -4,6 +4,8 @@ class StreamlitPageHome(StreamlitPage): + """Streamlit page for the home section.""" + def _main_page(self): pass diff --git a/online/_utils.py b/online/_utils.py index a69b26e..b743694 100644 --- a/online/_utils.py +++ b/online/_utils.py @@ -11,6 +11,7 @@ class ECDF: ---------- x : array_like Observations + """ def __init__(self, x): @@ -101,15 +102,15 @@ def pp_plot(psm_df): def fdr_plot(psm_df, fdr_threshold): """Plot number of identifications in function of FDR threshold.""" - df = ( + target_psm_df = ( psm_df[~psm_df["is_decoy"]] .reset_index(drop=True) .sort_values("qvalue", ascending=True) .copy() ) - df["count"] = (~df["is_decoy"]).cumsum() + target_psm_df["count"] = (~target_psm_df["is_decoy"]).cumsum() fig = px.line( - df, + target_psm_df, x="qvalue", y="count", log_x=True, From 8ee915205726f399bfc14ea374a8b0c4bc9af0ff Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 15:50:36 +0200 Subject: [PATCH 07/11] Add format checks --- .github/workflows/test.yml | 9 +++++++-- psm_utils/io/peptide_record.py | 3 +-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 29cc40f..064f74b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,10 +18,15 @@ jobs: with: python-version: "3.10" - - name: Check with Ruff + - name: Lint with Ruff uses: astral-sh/ruff-action@v3 with: - args: --exclude docs,tests + args: format --check --diff --exclude docs,tests + + - name: Check formatting with Ruff + uses: astral-sh/ruff-action@v3 + with: + args: --check exclude docs,tests - name: Install package and its dependencies run: pip install --editable .[dev,idxml] diff --git a/psm_utils/io/peptide_record.py b/psm_utils/io/peptide_record.py index d3ba916..749b3f9 100644 --- a/psm_utils/io/peptide_record.py +++ b/psm_utils/io/peptide_record.py @@ -188,8 +188,7 @@ def _entry_to_psm(entry: _PeprecEntry, filename: str | Path) -> PSM: is_decoy = is_decoy_map[entry.label] except (ValueError, KeyError) as e: raise InvalidPeprecError( - f"Could not parse value for `label` {entry.label}. " - "Should be `1` or `-1`." + f"Could not parse value for `label` {entry.label}. Should be `1` or `-1`." ) from e else: is_decoy = None From 371a75c4d6c408dec349c08918401b75336266a4 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 15:53:52 +0200 Subject: [PATCH 08/11] Fix ruff arguments --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 064f74b..f2c0a06 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,15 +18,15 @@ jobs: with: python-version: "3.10" - - name: Lint with Ruff + - name: Check formatting with Ruff uses: astral-sh/ruff-action@v3 with: args: format --check --diff --exclude docs,tests - - name: Check formatting with Ruff + - name: Lint with Ruff uses: astral-sh/ruff-action@v3 with: - args: --check exclude docs,tests + args: check --exclude docs,tests - name: Install package and its dependencies run: pip install --editable .[dev,idxml] From 24bf8955b3b921e2a00f9c33eea8985503d50192 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 16:24:05 +0200 Subject: [PATCH 09/11] Add mypy to test action --- .github/workflows/test.yml | 11 +++++++---- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f2c0a06..6aed2cf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,19 +18,22 @@ jobs: with: python-version: "3.10" - - name: Check formatting with Ruff + - name: Lint with Ruff uses: astral-sh/ruff-action@v3 with: - args: format --check --diff --exclude docs,tests + args: check --exclude docs,tests - - name: Lint with Ruff + - name: Check formatting with Ruff uses: astral-sh/ruff-action@v3 with: - args: check --exclude docs,tests + args: format --check --diff --exclude docs,tests - name: Install package and its dependencies run: pip install --editable .[dev,idxml] + - name: Static type checking with mypy + run: mypy + - name: Test with pytest and codecov run: | pytest --cov=psm_utils --cov-report=xml tests/ diff --git a/pyproject.toml b/pyproject.toml index 8d67852..ed89a85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ dependencies = [ ] [project.optional-dependencies] -dev = ["ruff", "isort>5", "pytest", "pytest-cov"] +dev = ["ruff", "isort>5", "pytest", "pytest-cov", "mypy"] docs = [ "numpydoc>=1,<2", "recommonmark", From f58d55e4a864c3460294b758c7e8affeb8ccf552 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 16:30:48 +0200 Subject: [PATCH 10/11] Fix mypy config --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ed89a85..d4a07a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,3 +83,4 @@ ignore = ["D203", "D212"] [tool.mypy] files = ["psm_utils/**/*.py"] +install_types = true From 7487324ffea96fa1e95abcf087398e440ec767fd Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 8 Aug 2025 16:45:51 +0200 Subject: [PATCH 11/11] Set mypy to non-interacive --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6aed2cf..ae65341 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,7 +32,7 @@ jobs: run: pip install --editable .[dev,idxml] - name: Static type checking with mypy - run: mypy + run: mypy --non-interactive - name: Test with pytest and codecov run: |