Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle celery errors by re-running async task #14

Merged
merged 7 commits into from
Feb 4, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic
Versioning](https://semver.org/spec/v2.0.0.html).

# [0.19.1] -
### Fixed
- [PR 14](/~https://github.com/salesforce/django-declarative-apis/pull/14) Retry async tasks if kombu.exceptions.OperationalError is encounterer

# [0.19.0] - 2020-01-07
### Added
- [PR 12](/~https://github.com/salesforce/django-declarative-apis/pull/12) Added support for logging correlation ids in deferred tasks.
Expand Down
17 changes: 16 additions & 1 deletion django_declarative_apis/machinery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import celery
import time
from typing import NamedTuple
import kombu.exceptions

from django.conf import settings
from django.core.cache import cache
Expand Down Expand Up @@ -183,7 +184,21 @@ def schedule_future_task_runner(task_runner_args, task_runner_kwargs,
countdown=countdown
)
task_runner_kwargs['correlation_id'] = _get_correlation_id()
future_task_runner.apply_async(task_runner_args, task_runner_kwargs, queue=queue, routing_key=routing_key, countdown=countdown+delay)

MAX_ATTEMPTS = 3
for attempt in range(MAX_ATTEMPTS):
# XXX: This is an attempt to skirt around an unsolved, low repro issue somewhere in the celery/kombu/redis-py stack.
# Once in a while, a connection in the pool will timeout prior to a health check being called in redis-py and
# will result in an error being raised here. This should be removed once the issue has been sorted out.
# Note: This is around the use of redis-py in celery where celery's event loop is not running
# /~https://github.com/celery/kombu/issues/1019
try:
future_task_runner.apply_async(task_runner_args, task_runner_kwargs, queue=queue, routing_key=routing_key, countdown=countdown+delay)
return
except kombu.exceptions.OperationalError as err:
logger.warn('kombu.exceptions.OperationalError (attempt: %s)', attempt)
if attempt >= MAX_ATTEMPTS - 1:
raise err
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to have the option to fall back to running the task synchronously, perhaps controlled by a django setting, or else an option in the DeferrableTask decorator



@celery_task(ignore_results=True,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

setuptools.setup(
name='django-declarative-apis',
version='0.19.0',
version='0.19.1',
author='Drew Shafer',
url='https://salesforce.com',
description='Simple, readable, declarative APIs for Django',
Expand Down
72 changes: 72 additions & 0 deletions tests/machinery/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
import http
import urllib.parse

import kombu.exceptions
import django.core.exceptions
from django.db import models
from django.core.cache import cache

from django_declarative_apis import machinery, models as dda_models
from django_declarative_apis.machinery import errors, filtering, tasks
from django_declarative_apis.machinery.tasks import future_task_runner
from django_declarative_apis.resources.utils import HttpStatusCode, rc

import tests.models
Expand Down Expand Up @@ -749,6 +751,76 @@ def setUp(self):
'filtered_retry_count_2': 0
}

def test_get_response_kombu_error_retried(self):
expected_response = {'foo': 'bar'}
endpoint = _TestEndpoint(expected_response)
manager = machinery.EndpointBinder.BoundEndpointManager(
machinery._EndpointRequestLifecycleManager(endpoint),
endpoint
)

conf = tasks.future_task_runner.app.conf
old_val = conf['task_always_eager']
conf['task_always_eager'] = True

cache.set(tasks.JOB_COUNT_CACHE_KEY, 0)

with mock.patch('django_declarative_apis.machinery.tasks.future_task_runner.apply_async') as mock_apply:
exceptions = iter([kombu.exceptions.OperationalError, kombu.exceptions.OperationalError])
def _side_effect(*args, **kwargs):
try:
raise next(exceptions)
except StopIteration:
return future_task_runner.apply(*args, **kwargs)
mock_apply.side_effect = _side_effect

try:
resp = manager.get_response()
finally:
conf['task_always_eager'] = old_val

self.assertEqual(resp, (http.HTTPStatus.OK, expected_response))
self.assertTrue(cache.get(tasks.JOB_COUNT_CACHE_KEY) != 0)

self.assertEqual('deferred task executed', _TestEndpoint.semaphore['status'])

def test_get_response_kombu_error_attempts_exceeded(self):
expected_response = {'foo': 'bar'}
endpoint = _TestEndpoint(expected_response)
manager = machinery.EndpointBinder.BoundEndpointManager(
machinery._EndpointRequestLifecycleManager(endpoint),
endpoint
)

conf = tasks.future_task_runner.app.conf
old_val = conf['task_always_eager']
conf['task_always_eager'] = True

cache.set(tasks.JOB_COUNT_CACHE_KEY, 0)

with mock.patch('django_declarative_apis.machinery.tasks.future_task_runner.apply_async') as mock_apply:
exceptions = iter([
kombu.exceptions.OperationalError,
kombu.exceptions.OperationalError,
kombu.exceptions.OperationalError,
])
def _side_effect(*args, **kwargs):
try:
raise next(exceptions)
except StopIteration:
return future_task_runner.apply(*args, **kwargs)
mock_apply.side_effect = _side_effect

try:
resp = manager.get_response()
self.fail('should have triggered a kombu.exceptions.OperationalError')
except kombu.exceptions.OperationalError:
pass
finally:
conf['task_always_eager'] = old_val

self.assertIsNone(_TestEndpoint.semaphore['status'])

def test_get_response_success(self):
expected_response = {'foo': 'bar'}

Expand Down