Skip to content

Commit

Permalink
Merge pull request #2 from RIVM-bioinformatics/dev
Browse files Browse the repository at this point in the history
Add `--keep-gaps` flag
  • Loading branch information
florianzwagemaker authored Dec 5, 2022
2 parents 0bf351a + fea7643 commit 59bbb36
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 20 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/publish.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Publish Python package

on:
release:
types: [published]

jobs:
publish:
name: Publish package
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10"

- name: Build package
run: |
python -m pip install --upgrade pip build
python -m build
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_TOKEN_PROD }}
54 changes: 54 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: Build and release
on:
push:
branches:
- main

jobs:
bump-version:
name: Release version
runs-on: ubuntu-latest

steps:
- uses: GoogleCloudPlatform/release-please-action@v3
id: release
with:
release-type: python
package-name: AminoExtract
token: ${{ secrets.RELEASE_TOKEN }}

# update-docs:
# needs: bump-version
# name: Update docs
# runs-on: ubuntu-latest
# if: "contains(github.event.head_commit.message, 'chore(main):')"
# steps:
# - name: Checkout main
# uses: actions/checkout@v3
# with:
# fetch-depth: 0

# - name: Copy Changelog
# uses: canastro/copy-file-action@master
# with:
# source: "CHANGELOG.md"
# target: "docs/changelog.md"

# - name: Setup Git
# run: |
# git config --global user.name "Github Actions"
# git config --global user.email '41898282+github-actions[bot]@users.noreply.github.com'

# - name: Setup Python
# uses: actions/setup-python@v4
# with:
# python-version: "3.10"

# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install -r docs-requirements.txt

# - name: Publish docs
# run: |
# mike deploy --config-file mkdocs.yml --push --force --update-aliases $(git tag --sort=committerdate | tail -1 | sed 's/v//') latest
29 changes: 29 additions & 0 deletions .github/workflows/sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Sync Branches
on:
push:
branches:
- main

jobs:
sync-branches:
runs-on: ubuntu-latest
name: Syncing branches
if: "contains(github.event.head_commit.message, 'chore(main):')"

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Node
uses: actions/setup-node@v3
with:
node-version: 12

- name: Opening pull request
id: pull
uses: tretuna/sync-branches@1.4.0
with:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
FROM_BRANCH: "main"
TO_BRANCH: "dev"
PULL_REQUEST_TITLE: "chore: sync main to dev"
4 changes: 3 additions & 1 deletion AminoExtract/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def main() -> None:
sys.exit(1) if empty_dataframe(GFF_Obj.df, args.feature_type) else None
SeqRecords = filter_sequences(GFF_Obj, fasta_records)

AA_dict = Extract_AminoAcids(GFFobj=GFF_Obj, SeqRecords=SeqRecords, verbose=True)
AA_dict = Extract_AminoAcids(
GFFobj=GFF_Obj, SeqRecords=SeqRecords, keep_gaps=args.keep_gaps, verbose=True
)

write_aa_file(AA_dict, args.output, args.name, args.outtype)
11 changes: 9 additions & 2 deletions AminoExtract/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import re
import sys

from rich import print

from AminoExtract import __prog__, __version__
from AminoExtract.functions import QuickArgFormatter, RichParser, log

Expand Down Expand Up @@ -150,6 +148,15 @@ def get_args(givenargs: list[str] | None = None) -> argparse.Namespace:
required=False,
)

opt_args.add_argument(
"--keep-gaps",
"-kg",
action="store_true",
default=False,
help='If this flag is set then the amino acid translation will be done including gaps in the nucleotide sequence.\nThis results in an "X" on gap positions in the aminoacid sequence.\n [underline]By default, gaps are removed before translation.[/underline]',
required=False,
)

opt_args.add_argument(
"--version",
"-v",
Expand Down
2 changes: 0 additions & 2 deletions AminoExtract/filter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import sys

import pandas as pd

from AminoExtract.functions import log
Expand Down
2 changes: 1 addition & 1 deletion AminoExtract/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import magic
import pandas as pd
from Bio import Seq, SeqIO
from Bio import SeqIO

from AminoExtract.functions import log

Expand Down
36 changes: 23 additions & 13 deletions AminoExtract/sequences.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import sys

from Bio.Seq import Seq
from rich import print

from AminoExtract.functions import log
from AminoExtract.reader import GffDataFrame
Expand All @@ -25,7 +22,10 @@ def Reverse_complement(seq: str) -> Seq:


def Extract_AminoAcids(
GFFobj: GffDataFrame, SeqRecords: list, verbose: bool = False
GFFobj: GffDataFrame,
SeqRecords: list,
keep_gaps: bool = False,
verbose: bool = False,
) -> dict:
"""
Extract amino acids from the SeqRecord objects based on the start and end positions of the GFFobj.df dataframe
Expand All @@ -36,13 +36,18 @@ def Extract_AminoAcids(
GffDataFrame
SeqRecords : list
list of SeqRecord objects
feature_type : str
str
keep_gaps : bool, optional
If True, gaps ('-') in the nucleotide sequence will not be removed before AA translation.
If False, gaps will be removed from the nucleotide sequence before translation.
(default is False)
verbose : bool, optional
bool = False
Returns
-------
dict
A dictionary with the SeqRecord id as the key and the amino acid sequences as the value.
A dictionary with the sequence ID as the key and a dictionary as the value. The dictionary has the
name of the feature as the key and the amino acid sequence as the value.
"""

log.info(
Expand All @@ -58,11 +63,12 @@ def Extract_AminoAcids(
for row in GFFobj.df.itertuples():
try:
name = row.Name
except AttributeError:
log.warn("No '[green]Name[/green]' attribute found in GFF records. Using '[cyan]ID[/cyan]' instead") if verbose else None
except AttributeError:
log.warn(
"No '[green]Name[/green]' attribute found in GFF records. Using '[cyan]ID[/cyan]' instead"
) if verbose else None
name = f"ID-{row.ID}"



# get the sequence ID from the row
seq_id = row.seqid

Expand All @@ -75,7 +81,11 @@ def Extract_AminoAcids(
NucSequence = SeqDict[seq_id]

# get the sequence slice from the start to the end position
seq_slice = NucSequence[start:end].replace("-", "")
seq_slice = (
NucSequence[start:end]
if keep_gaps
else NucSequence[start:end].replace("-", "")
)

# convert the sequence slice to a string
seq_slice_str = str(seq_slice)
Expand Down
1 change: 0 additions & 1 deletion AminoExtract/writer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pathlib
import sys

from Bio import Seq

Expand Down
8 changes: 8 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,12 @@
},
keywords=[],
zip_safe=False,
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: MIT License",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Intended Audience :: Science/Research",
"Operating System :: POSIX",
],
)

0 comments on commit 59bbb36

Please sign in to comment.