Skip to content

Commit

Permalink
Merge pull request #90 from umr-lops/add_extra_outdir_check
Browse files Browse the repository at this point in the history
donwload checking directories enlarged.
  • Loading branch information
agrouaze authored Dec 17, 2024
2 parents d58dab2 + a78f092 commit be3c0ef
Show file tree
Hide file tree
Showing 27 changed files with 660 additions and 383 deletions.
5 changes: 5 additions & 0 deletions .github/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
changelog:
exclude:
authors:
- dependabot
- pre-commit-ci
98 changes: 98 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
name: CI

on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
detect-skip-ci-trigger:
name: "Detect CI Trigger: [skip-ci]"
if: |
github.repository == 'umr-lops/cdsodatacli'
&& github.event_name == 'push'
|| github.event_name == 'pull_request'
runs-on: ubuntu-latest
outputs:
triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2
- uses: xarray-contrib/ci-trigger@v1
id: detect-trigger
with:
keyword: "[skip-ci]"

ci:
name: ${{ matrix.os }} py${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
needs: detect-skip-ci-trigger

if: needs.detect-skip-ci-trigger.outputs.triggered == 'false'

defaults:
run:
shell: bash -l {0}

strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]

steps:
- name: Checkout the repository
uses: actions/checkout@v4
with:
# need to fetch all tags to get a correct version
fetch-depth: 0 # fetch all branches and tags

- name: Setup environment variables
run: |
echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
echo "CONDA_ENV_FILE=ci/requirements/environment.yaml" >> $GITHUB_ENV
- name: Setup micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-file: ${{ env.CONDA_ENV_FILE }}
environment-name: cdsodatacli-tests
cache-environment: true
cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{matrix.python-version}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}"
create-args: >-
python=${{matrix.python-version}}
conda
- name: Install cdsodatacli
run: |
python -m pip install --no-deps -e .
- name: Import cdsodatacli
run: |
python -c "import cdsodatacli"
- name: Create secrets.yml from GitHub Secret
run: |
echo "${{ secrets.SECRET_FOR_TEST_DOWNLOAD_CDSE }}" > secrets.yml
shell: bash

- name: Export secrets as environment variables
run: |
export DEFAULT_LOGIN_CDSE=$(grep 'DEFAULT_LOGIN_CDSE' secrets.yml | cut -d':' -f2 | tr -d ' ')
export DEFAULT_PASSWD_CDSE=$(grep 'DEFAULT_PASSWD_CDSE' secrets.yml | cut -d':' -f2 | tr -d ' ')
echo "DEFAULT_LOGIN_CDSE=${DEFAULT_LOGIN_CDSE}" >> $GITHUB_ENV
echo "DEFAULT_PASSWD_CDSE=${DEFAULT_PASSWD_CDSE}" >> $GITHUB_ENV
shell: bash
- name: Run tests
env:
DEFAULT_LOGIN_CDSE: ${{ env.DEFAULT_LOGIN_CDSE }}
DEFAULT_PASSWD_CDSE: ${{ env.DEFAULT_PASSWD_CDSE }}
run: |
python -m pytest --cov=cdsodatacli
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ fabric.properties
# modules.xml
# .idea/misc.xml
# *.ipr
.idea

localconfig.yml
# Sonarlint plugin
.idea/**/sonarlint/

Expand Down Expand Up @@ -616,3 +618,5 @@ MigrationBackup/

# End of https://www.gitignore.io/api/osx,python,pycharm,windows,visualstudio,visualstudiocode
.cdsodatacli/api_cache/

.env
61 changes: 33 additions & 28 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,36 +1,41 @@
default_language_version:
python: python3.9

default_stages: [commit, push]
ci:
autoupdate_schedule: monthly

# https://pre-commit.com/
repos:
- repo: /~https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
rev: v5.0.0
hooks:
- id: check-yaml
- id: trailing-whitespace
- id: end-of-file-fixer
exclude: LICENSE

- repo: local
- id: check-docstring-first
- repo: /~https://github.com/psf/black
rev: 24.10.0
hooks:
- id: pyupgrade
name: pyupgrade
entry: poetry run pyupgrade --py39-plus
types: [python]
language: system

- repo: local
- id: black
- repo: /~https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: isort
name: isort
entry: poetry run isort --settings-path pyproject.toml
types: [python]
language: system

- repo: local
- id: blackdoc
additional_dependencies: ["black==24.10.0"]
- id: blackdoc-autoupdate-black
- repo: /~https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.1
hooks:
- id: black
name: black
entry: poetry run black --config pyproject.toml
types: [python]
language: system
- id: ruff
args: [--fix]
- repo: /~https://github.com/kynan/nbstripout
rev: 0.7.1
hooks:
- id: nbstripout
args: [--extra-keys=metadata.kernelspec metadata.language_info.version]
- repo: /~https://github.com/rbubley/mirrors-prettier
rev: v3.3.3
hooks:
- id: prettier
- repo: /~https://github.com/ComPWA/taplo-pre-commit
rev: v0.9.3
hooks:
- id: taplo-format
- id: taplo-lint
args: [--no-schema]
29 changes: 17 additions & 12 deletions cdsodatacli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
MAX_SESSION_PER_ACCOUNT,
)
from cdsodatacli.query import fetch_data
from cdsodatacli.utils import conf, test_safe_archive, test_safe_spool
from cdsodatacli.utils import conf, check_safe_in_archive, check_safe_in_spool, check_safe_in_outputdir
from cdsodatacli.product_parser import ExplodeSAFE
from collections import defaultdict

Expand Down Expand Up @@ -166,10 +166,12 @@ def filter_product_already_present(cpt, df, outputdir, force_download=False):
to_download = False
if force_download:
to_download = True
if test_safe_archive(safename=safename_product):
if check_safe_in_archive(safename=safename_product):
cpt["archived_product"] += 1
elif test_safe_spool(safename=safename_product):
elif check_safe_in_spool(safename=safename_product):
cpt["in_spool_product"] += 1
elif check_safe_in_outputdir(outputdir=outputdir,safename=safename_product):
cpt["in_outdir_product"] += 1
else:
to_download = True
cpt["product_absent_from_local_disks"] += 1
Expand Down Expand Up @@ -206,11 +208,11 @@ def download_list_product_multithread_v2(
v2 is handling multi account round-robin and token semaphore files
Parameters
----------
list_id (list)
list_safename (list)
outputdir (str)
list_id (list): product hash
list_safename (list): product names
outputdir (str): the directory where to store the product collected
hideProgressBar (bool): True -> no tqdm progress bar in stdout
account_group (str)
account_group (str): the name of the group of CDSE logins to be used
check_on_disk (bool): True -> if the product is in the spool dir or in archive dir the download is skipped
Returns
Expand Down Expand Up @@ -341,7 +343,7 @@ def download_list_product_multithread_v2(


def download_list_product(
list_id, list_safename, outputdir, specific_account, hideProgressBar=False
list_id, list_safename, outputdir, specific_account,specific_passwd=None, hideProgressBar=False
):
"""
Expand All @@ -350,8 +352,10 @@ def download_list_product(
list_id (list) of string could be hash (eg a1e74573-aa77-55d6-a08d-7b6612761819) provided by CDS Odata
list_safename (list) of string basename of SAFE product (eg. S1A_IW_GRDH_1SDV_20221013T065030_20221013T0650...SAFE)
outputdir (str) path where product will be stored
specific_account (str): CDSE account to use
specific_passwd (str): CDSE password associated to specific_account (optional)
hideProgressBar (bool): True -> no tqdm progress bar
specific_account (str):
Returns
-------
Expand All @@ -369,7 +373,7 @@ def download_list_product(
login,
path_semphore_token,
) = get_bearer_access_token(
quiet=hideProgressBar, specific_account=specific_account
quiet=hideProgressBar, specific_account=specific_account,passwd=specific_passwd
)
else: # select randomly one token among existing
path_semphore_token = random.choice(lst_usable_tokens)
Expand All @@ -394,9 +398,9 @@ def download_list_product(
id_product = list_id[ii]
url_product = conf["URL_download"] % id_product
safename_product = list_safename[ii]
if test_safe_archive(safename=safename_product):
if check_safe_in_archive(safename=safename_product):
cpt["archived_product"] += 1
elif test_safe_spool(safename=safename_product):
elif check_safe_in_spool(safename=safename_product):
cpt["in_spool_product"] += 1
else:
cpt["product_absent_from_local_disks"] += 1
Expand Down Expand Up @@ -688,6 +692,7 @@ def download_list_product_sequential(

def main():
"""
download data from an existing listing of product
package as an alias for this method
Returns
-------
Expand Down
11 changes: 7 additions & 4 deletions cdsodatacli/fetch_access_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
MAX_VALIDITY_ACCESS_TOKEN = 600 # sec (defined by CDS API)


def get_bearer_access_token(quiet=True, specific_account=None, account_group="logins"):
def get_bearer_access_token(quiet=True, specific_account=None,passwd=None, account_group="logins"):
"""
OData access token (validity=600sec)
specific_account (str) [optional, default=None -> first available account in config file]
passwd (str): [optional, default is to search in config files]
Returns
-------
Expand All @@ -24,11 +25,13 @@ def get_bearer_access_token(quiet=True, specific_account=None, account_group="lo
if specific_account is None:
all_accounts = list(conf[account_group].keys())
login = random.choice(all_accounts)
passwd = conf[account_group][all_accounts[0]]
if passwd is None:
passwd = conf[account_group][all_accounts[0]]
else:
login = specific_account
logging.debug("conf[account_group] %s", type(conf[account_group]))
passwd = conf[account_group][specific_account]
if passwd is None:
logging.debug("conf[account_group] %s", type(conf[account_group]))
passwd = conf[account_group][specific_account]
if quiet:
prefix = "curl -s "
else:
Expand Down
44 changes: 37 additions & 7 deletions cdsodatacli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
local_config_pontential_path = os.path.join(
os.path.dirname(cdsodatacli.__file__), "localconfig.yml"
)

config_path = os.path.join(os.path.dirname(cdsodatacli.__file__), "config.yml")
if os.path.exists(local_config_pontential_path):
config_path = local_config_pontential_path
used_config_path = local_config_pontential_path
else:
config_path = os.path.join(os.path.dirname(cdsodatacli.__file__), "config.yml")
logging.info("config path: %s", config_path)
stream = open(config_path, "r")
used_config_path = config_path
logging.info("config path that is used: %s", used_config_path)
stream = open(used_config_path, "r")
conf = load(stream, Loader=Loader)


def test_safe_spool(safename):
def check_safe_in_spool(safename):
"""
Parameters
Expand Down Expand Up @@ -48,6 +48,36 @@ def test_safe_spool(safename):
logging.debug("present_in_spool : %s", present_in_spool)
return present_in_spool

def check_safe_in_outputdir(outputdir,safename):
"""
Parameters
----------
safename (str) basename
Returns
-------
present_in_outputdir (bool): True -> the product is already in the spool dir
"""
present_in_outdir = False
for uu in ["", ".zip", "replaced"]:
if uu == "":
potential_file = os.path.join(outputdir, safename)
elif uu == ".zip":
potential_file = os.path.join(outputdir, safename + ".zip")
elif uu == "replaced":
potential_file = os.path.join(
outputdir, safename.replace(".SAFE", ".zip")
)
else:
raise NotImplemented
if os.path.exists(potential_file):
present_in_outdir = True
break
logging.debug("present_in_spool : %s", present_in_outdir)
return present_in_outdir


def WhichArchiveDir(safe):
"""
Expand Down Expand Up @@ -90,7 +120,7 @@ def WhichArchiveDir(safe):
return gooddir


def test_safe_archive(safename):
def check_safe_in_archive(safename):
"""
Parameters
Expand Down
Loading

0 comments on commit be3c0ef

Please sign in to comment.