levitsky · levitsky · Oct 28, 2022 · Oct 25, 2022 · Oct 25, 2022 · Oct 25, 2022
diff --git a/doc/source/api/proforma.rst b/doc/source/api/proforma.rst
@@ -1 +1,232 @@
-.. automodule:: pyteomics.proforma
+
+.. module:: pyteomics.proforma
+
+proforma - Proteoform and Peptidoform Notation
+==============================================
+
+ProForma is a notation for defining modified amino acid sequences using
+a set of controlled vocabularies, as well as encoding uncertain or partial
+information about localization. See `ProForma specification <https://www.psidev.info/proforma>`_
+for more up-to-date information.
+
+Strictly speaking, this implementation supports ProForma v2.
+
+.. contents::
+    :depth: 2
+
+Data Access
+-----------
+
+:py:func:`parse` - The primary interface for parsing ProForma strings.
+
+    >>> parse("EM[Oxidation]EVT[#g1(0.01)]S[#g1(0.09)]ES[Phospho#g1(0.90)]PEK")
+        ([('E', None),
+          ('M', [GenericModification('Oxidation', None, None)]),
+          ('E', None),
+          ('V', None),
+          ('T', [LocalizationMarker(0.01, None, '#g1')]),
+          ('S', [LocalizationMarker(0.09, None, '#g1')]),
+          ('E', None),
+          ('S',
+          [GenericModification('Phospho', [LocalizationMarker(0.9, None, '#g1')], '#g1')]),
+          ('P', None),
+          ('E', None),
+          ('K', None)],
+         {'n_term': None,
+          'c_term': None,
+          'unlocalized_modifications': [],
+          'labile_modifications': [],
+          'fixed_modifications': [],
+          'intervals': [],
+          'isotopes': [],
+          'group_ids': ['#g1']})
+
+:py:func:`to_proforma` - Format a sequence and set of properties as ProForma text.
+
+
+Classes
+-------
+
+:py:class:`ProForma` - An object oriented version of the parsing and formatting code,
+coupled with minimal information about mass and position data.
+
+    >>> seq = ProForma.parse("EM[Oxidation]EVT[#g1(0.01)]S[#g1(0.09)]ES[Phospho#g1(0.90)]PEK")
+    >>> seq
+    ProForma([('E', None), ('M', [GenericModification('Oxidation', None, None)]), ('E', None),
+              ('V', None), ('T', [LocalizationMarker(0.01, None, '#g1')]), ('S', [LocalizationMarker(0.09, None, '#g1')]),
+              ('E', None), ('S', [GenericModification('Phospho', [LocalizationMarker(0.9, None, '#g1')], '#g1')]),
+              ('P', None), ('E', None), ('K', None)],
+              {'n_term': None, 'c_term': None, 'unlocalized_modifications': [],
+               'labile_modifications': [], 'fixed_modifications': [], 'intervals': [],
+               'isotopes': [], 'group_ids': ['#g1'], 'charge_state': None}
+            )
+    >>> seq.mass
+    1360.51054400136
+    >>> seq.tags
+    [GenericModification('Oxidation', None, None),
+     LocalizationMarker(0.01, None, '#g1'),
+     LocalizationMarker(0.09, None, '#g1'),
+     GenericModification('Phospho', [LocalizationMarker(0.9, None, '#g1')], '#g1')]
+    >>> str(seq)
+    'EM[Oxidation]EVT[#g1(0.01)]S[#g1(0.09)]ES[Phospho|#g1(0.9)]PEK'
+
+Dependencies
+------------
+
+To resolve PSI-MOD, XL-MOD, and GNO identifiers, :mod:`psims` is required. By default,
+:mod:`psims` retrieves the most recent version of each controlled vocabulary from the internet, but
+includes a fall-back version to use when the network is unavailable. It can also create
+an application cache on disk.
+
+CV Disk Caching
+~~~~~~~~~~~~~~~
+ProForma uses several different controlled vocabularies (CVs) that are each versioned separately.
+Internally, the Unimod controlled vocabulary is accessed using :class:`~pyteomics.mass.mass.Unimod`
+and all other controlled vocabularies are accessed using :mod:`psims`. Unless otherwise stated,
+the machinery will download fresh copies of each CV when first queried.
+
+To avoid this slow operation, you can keep a cached copy of the CV source file on disk and tell
+:mod:`pyteomics` and :mod:`psims` where to find them:
+
+.. code-block:: python
+
+    from pyteomics import proforma
+
+    # set the path for Unimod loading via pyteomics
+    proforma.set_unimod_path("path/to/unimod.xml")
+
+    # set the cache directory for downloading and reloading OBOs via psims
+    proforma.obo_cache.cache_path = "obo/cache/dir/"
+    proforma.obo_cache.enabled = True
+
+
+Compliance Levels
+-----------------
+
+1. Base Level Support
+Represents the lowest level of compliance, this level involves providing support for:
+
+    - [x] Amino acid sequences
+    - [x] Protein modifications using two of the supported CVs/ontologies: Unimod and PSI-MOD.
+    - [x] Protein modifications using delta masses (without prefixes)
+    - [x] N-terminal, C-terminal and labile modifications.
+    - [x] Ambiguity in the modification position, including support for localisation scores.
+    - [x] INFO tag.
+
+2. Additional Separate Support
+These features are independent from each other:
+
+    - [x] Unusual amino acids (O and U).
+    - [x] Ambiguous amino acids (e.g. X, B, Z). This would include support for sequence tags of known mass (using the character X).
+    - [x] Protein modifications using delta masses (using prefixes for the different CVs/ontologies).
+    - [x] Use of prefixes for Unimod (U:) and PSI-MOD (M:) names.
+    - [x] Support for the joint representation of experimental data and its interpretation.
+
+3. Top Down Extensions
+
+    - [ ] Additional CV/ontologies for protein modifications: RESID (the prefix R MUST be used for RESID CV/ontology term names)
+    - [x] Chemical formulas (this feature occurs in two places in this list).
+
+4. Cross-Linking Extensions
+
+    - [ ]  Cross-linked peptides (using the XL-MOD CV/ontology, the prefix X MUST be used for XL-MOD CV/ontology term names).
+
+5. Glycan Extensions
+
+    - [x] Additional CV/ontologies for protein modifications: GNO (the prefix G MUST be used for GNO CV/ontology term names)
+    - [x] Glycan composition.
+    - [x] Chemical formulas (this feature occurs in two places in this list).
+
+6. Spectral Support
+
+    - [x] Charge state and adducts
+    - [ ] Chimeric spectra are special cases.
+    - [x] Global modifications (e.g., every C is C13).
+
+
+Functions
+---------
+
+.. autofunction:: parse
+
+.. autofunction:: to_proforma
+
+Helpers
+~~~~~~~
+
+.. autofunction:: set_unimod_path
+
+
+High Level Interface
+--------------------
+
+.. autoclass:: ProForma
+
+
+Tag Types
+---------
+
+.. autoclass:: TagBase
+
+.. autoclass:: TagTypeEnum
+
+
+Modification Tags
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: MassModification
+
+.. autoclass:: ModificationBase
+
+.. autoclass:: GenericModification
+
+.. autoclass:: FormulaModification
+
+.. autoclass:: UnimodModification
+
+.. autoclass:: PSIModModification
+
+.. autoclass:: XLMODModification
+
+.. autoclass:: GNOmeModification
+
+.. autoclass:: GlycanModification
+
+.. autoclass:: ModificationToken
+
+
+Label Tags
+~~~~~~~~~~
+
+.. autoclass:: InformationTag
+
+.. autoclass:: PositionLabelTag
+
+.. autoclass:: LocalizationMarker
+
+
+Supporting Types
+----------------
+
+.. autoclass:: ModificationRule
+
+.. autoclass:: StableIsotope
+
+.. autoclass:: TaggedInterval
+
+.. autoclass:: ChargeState
+
+Modification Resolvers
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: ModificationResolver
+
+.. autoclass:: GenericResolver
+
+.. autoclass:: UnimodResolver
+
+.. autoclass:: PSIModResolver
+
+.. autoclass:: XLMODResolver
+
+.. autoclass:: GNOResolver
diff --git a/doc/source/api/unimod.rst b/doc/source/api/unimod.rst
@@ -1 +1,57 @@
-.. automodule:: pyteomics.mass.unimod
+.. module:: pyteomics.mass.unimod
+
+unimod - interface to the Unimod database
+=========================================
+
+This module provides an interface to the relational Unimod database.
+The main class is :py:class:`Unimod`, which provides an identical interface
+to that of the in-memory implementation of the same name in :mod:`pyteomics.mass`.
+
+Dependencies
+------------
+
+This module requires :py:mod:`lxml` and :py:mod:`sqlalchemy`.
+
+
+Primary Interface
+-----------------
+
+    .. autoclass:: Unimod
+
+
+Relational Entities
+~~~~~~~~~~~~~~~~~~~
+There are many tables that are described as object-relationally mapped (ORM) types in this module. The most important two are shown
+here.
+
+    .. class:: Modification
+
+        A single modification record from Unimod, having an :attr:`id`, :attr:`full_name`, :attr:`code_name`,
+        and :attr:`ex_code_name` as identifiers, and :attr:`monoisotopic_mass`, :attr:`average_mass`, and
+        :attr:`composition` as mass-describing properties.
+
+        Additional relationships may be loaded through :attr:`specificities` (see :class:`~.Specificity`), :attr:`alternative_names`,
+        :attr:`fragments`, and :attr:`notes`.
+
+
+    .. class:: Specificity
+
+        Describes the relationship between a :class:`~.Modification` and an amino acid/position rule, along with the
+        chemical process type that gives rise to that modification event.
+
+Other ORM Types
+***************
+
+The following ORM types may be useful when composing a more detailed query. Additional types may be found in the source.
+
+    .. class:: AminoAcid
+
+    .. class:: Position
+
+    .. class:: Classification
+
+    .. class:: Fragment
+
+    .. class:: AlternativeName
+
+    .. class:: Crossreference
diff --git a/pyteomics/mass/mass.py b/pyteomics/mass/mass.py
@@ -1204,3 +1204,11 @@ def by_id(self, i):
         return self._mods[self._id[i]]
 
     __getitem__ = by_id
+
+
+def neutral_mass(mz, z, charge_carrier=_nist_mass[PROTON][0][0]):
+    return (mz * abs(z)) - (z * charge_carrier)
+
+
+def mass_charge_ratio(neutral_mass, z, charge_carrier=_nist_mass[PROTON][0][0]):
+    return (neutral_mass + (z * charge_carrier)) / abs(z)
diff --git a/pyteomics/mass/unimod.py b/pyteomics/mass/unimod.py
@@ -8,7 +8,7 @@
 Dependencies
 ------------
 
-This module requres :py:mod:`lxml` and :py:mod:`sqlalchemy`.
+This module requires :py:mod:`lxml` and :py:mod:`sqlalchemy`.
 """
 
 #   Copyright 2015 Joshua Klein, Lev Levitsky
@@ -679,6 +679,24 @@ def session(path='sqlite:///unimod.db'):
 class Unimod(object):
     """
     Main class representing the relational Unimod database.
+
+    Examples
+    --------
+
+    If you just wish to get a new copy of the data and store it in a temporary
+    in-memory database, invoking the type without parameters works without issue.
+
+    >>> new_db = Unimod()
+
+    If you want to persist a snapshot of the Unimod database to disk and query it
+    from there, or to re-use a previously downloaded database copy, pass a database
+    driver prefixed path:
+
+    >>> reused_db = Unimod("sqlite:///path/to/unimod.db")
+
+    If the path did not previously exist, a new copy of Unimod will be downloaded
+    and stored there on the first use, but be immediately available on subsequent
+    uses.
     """
     def __init__(self, path=None):
         """
@@ -765,3 +783,18 @@ def mods(self):
 
     def __iter__(self):
         return iter(self.session.query(Modification).yield_per(1000))
+
+    def query(self, *args):
+        '''Compose an SQL query using SQLAlchemy's ORM interface.
+
+        See :mod:`sqlalchemy`'s Session documentation for more details.
+        '''
+        return self.session.query(*args)
+
+    def execute(self, *args, **kwargs):
+        '''Execute an SQLAlchemy statement or a SQL string against the database,
+        returning the resulting database cursor.
+
+        See :mod:`sqlalchemy`'s Session documentation for more details.
+        '''
+        return self.session.execute(*args, **kwargs)