From a563e9c463303d8170d60ea0addf1dbb96d9da81 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 18 Sep 2020 17:00:30 +0200 Subject: [PATCH 01/11] Send a basic event ingestion status report --- posthog/apps.py | 9 +++------ posthog/celery.py | 20 ++++++++++++++++++++ posthog/utils.py | 8 ++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/posthog/apps.py b/posthog/apps.py index 73b771bebdea6..e25d827f2362a 100644 --- a/posthog/apps.py +++ b/posthog/apps.py @@ -1,11 +1,11 @@ -import hashlib import os -import uuid import posthoganalytics from django.apps import AppConfig from django.conf import settings +from posthog.utils import get_machine_id + class PostHogConfig(AppConfig): name = "posthog" @@ -16,10 +16,7 @@ def ready(self): if settings.DEBUG: # log development server launch to posthog if os.getenv("RUN_MAIN") == "true": - # MAC addresses are 6 bits long, so overflow shouldn't happen - # hashing here as we don't care about the actual address, just it being rather consistent - mac_address_hash = hashlib.md5(uuid.getnode().to_bytes(6, "little")) - posthoganalytics.capture(mac_address_hash.hexdigest(), "development server launched") + posthoganalytics.capture(get_machine_id(), "development server launched") posthoganalytics.disabled = True elif settings.TEST or os.environ.get("OPT_OUT_CAPTURE"): posthoganalytics.disabled = True diff --git a/posthog/celery.py b/posthog/celery.py index 406398fa1db0b..26003546e94b3 100644 --- a/posthog/celery.py +++ b/posthog/celery.py @@ -1,8 +1,11 @@ +import hashlib import os import time +import uuid from datetime import datetime from typing import Optional +import posthoganalytics import redis import statsd # type: ignore from celery import Celery, group @@ -10,8 +13,11 @@ from dateutil import parser from django.conf import settings from django.db import connection +from django.utils import timezone +from posthog.models import Event from posthog.settings import STATSD_HOST, STATSD_PORT, STATSD_PREFIX +from posthog.utils import get_machine_id # set the default Django settings module for the 'celery' program. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "posthog.settings") @@ -48,6 +54,9 @@ def setup_periodic_tasks(sender, **kwargs): sender.add_periodic_task( crontab(day_of_week="mon,fri"), update_event_partitions.s(), # check twice a week ) + sender.add_periodic_task( + crontab(day_of_week="mon"), status_report.s(), + ) sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug") sender.add_periodic_task(600, check_cached_items.s(), name="check dashboard items") @@ -85,6 +94,17 @@ def update_event_partitions(): ) +@app.task +def status_report(): + period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( + hour=0, minute=0, second=0, microsecond=0 + ) # very start of the current Monday + period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one + events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) + report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} + posthoganalytics.capture(get_machine_id(), "instance status report", report) + + @app.task def calculate_event_action_mappings(): from posthog.tasks.calculate_action import calculate_actions_from_last_calculation diff --git a/posthog/utils.py b/posthog/utils.py index 163c9bb4e8c77..8ebfba61cdb1b 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -8,6 +8,7 @@ import re import subprocess import time +import uuid from typing import Any, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse, urlsplit @@ -425,3 +426,10 @@ def __call__(self, *args, **kwds): if self.instance == None: self.instance = self.klass(*args, **kwds) return self.instance + + +def get_machine_id() -> str: + """A MAC address-dependent ID. Useful for PostHog instance analytics.""" + # MAC addresses are 6 bits long, so overflow shouldn't happen + # hashing here as we don't care about the actual address, just it being rather consistent + return hashlib.md5(uuid.getnode().to_bytes(6, "little")).hexdigest() From 5b45eb2e9be5bd29941ffc79cd22b8b629c26693 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 18 Sep 2020 22:35:48 +0200 Subject: [PATCH 02/11] Fix status report --- posthog/celery.py | 19 ++++--------------- posthog/tasks/calculate_action.py | 28 ++++++++++++---------------- posthog/tasks/status_report.py | 0 3 files changed, 16 insertions(+), 31 deletions(-) create mode 100644 posthog/tasks/status_report.py diff --git a/posthog/celery.py b/posthog/celery.py index 26003546e94b3..a47d4358acea6 100644 --- a/posthog/celery.py +++ b/posthog/celery.py @@ -1,11 +1,6 @@ -import hashlib import os import time -import uuid -from datetime import datetime -from typing import Optional -import posthoganalytics import redis import statsd # type: ignore from celery import Celery, group @@ -15,9 +10,7 @@ from django.db import connection from django.utils import timezone -from posthog.models import Event from posthog.settings import STATSD_HOST, STATSD_PORT, STATSD_PREFIX -from posthog.utils import get_machine_id # set the default Django settings module for the 'celery' program. os.environ.setdefault("DJANGO_SETTINGS_MODULE", "posthog.settings") @@ -55,7 +48,7 @@ def setup_periodic_tasks(sender, **kwargs): crontab(day_of_week="mon,fri"), update_event_partitions.s(), # check twice a week ) sender.add_periodic_task( - crontab(day_of_week="mon"), status_report.s(), + 10, status_report.s(), ) sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug") sender.add_periodic_task(600, check_cached_items.s(), name="check dashboard items") @@ -96,13 +89,9 @@ def update_event_partitions(): @app.task def status_report(): - period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( - hour=0, minute=0, second=0, microsecond=0 - ) # very start of the current Monday - period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one - events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) - report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} - posthoganalytics.capture(get_machine_id(), "instance status report", report) + from posthog.tasks.status_report import status_report + + status_report() @app.task diff --git a/posthog/tasks/calculate_action.py b/posthog/tasks/calculate_action.py index 43fa986bb78cb..a5c643bc93f19 100644 --- a/posthog/tasks/calculate_action.py +++ b/posthog/tasks/calculate_action.py @@ -1,25 +1,21 @@ import logging import time +import posthoganalytics from celery import shared_task +from django.utils import timezone -from posthog.celery import app -from posthog.models import Action +from posthog.models import Event +from posthog.utils import get_machine_id logger = logging.getLogger(__name__) -@shared_task -def calculate_action(action_id: int) -> None: - start_time = time.time() - action = Action.objects.get(pk=action_id) - action.calculate_events() - logger.info("Calculating action {} took {:.2f} seconds".format(action.pk, (time.time() - start_time))) - - -def calculate_actions_from_last_calculation() -> None: - actions = Action.objects.filter(deleted=False).only("pk") - for action in actions: - start_time = time.time() - action.calculate_events(start=action.last_calculated_at) - logger.info("Calculating action {} took {:.2f} seconds".format(action.pk, (time.time() - start_time))) +def status_report() -> None: + period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( + hour=0, minute=0, second=0, microsecond=0 + ) # very start of the current Monday + period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one + events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) + report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} + posthoganalytics.capture(get_machine_id(), "instance status report", report) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py new file mode 100644 index 0000000000000..e69de29bb2d1d From 2674ce386e8b2e5cbc1b584f831e120b7166714b Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Fri, 18 Sep 2020 22:42:25 +0200 Subject: [PATCH 03/11] Fix at last --- posthog/tasks/calculate_action.py | 28 ++++++++++++++++------------ posthog/tasks/status_report.py | 19 +++++++++++++++++++ posthog/utils.py | 3 ++- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/posthog/tasks/calculate_action.py b/posthog/tasks/calculate_action.py index a5c643bc93f19..43fa986bb78cb 100644 --- a/posthog/tasks/calculate_action.py +++ b/posthog/tasks/calculate_action.py @@ -1,21 +1,25 @@ import logging import time -import posthoganalytics from celery import shared_task -from django.utils import timezone -from posthog.models import Event -from posthog.utils import get_machine_id +from posthog.celery import app +from posthog.models import Action logger = logging.getLogger(__name__) -def status_report() -> None: - period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( - hour=0, minute=0, second=0, microsecond=0 - ) # very start of the current Monday - period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one - events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) - report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} - posthoganalytics.capture(get_machine_id(), "instance status report", report) +@shared_task +def calculate_action(action_id: int) -> None: + start_time = time.time() + action = Action.objects.get(pk=action_id) + action.calculate_events() + logger.info("Calculating action {} took {:.2f} seconds".format(action.pk, (time.time() - start_time))) + + +def calculate_actions_from_last_calculation() -> None: + actions = Action.objects.filter(deleted=False).only("pk") + for action in actions: + start_time = time.time() + action.calculate_events(start=action.last_calculated_at) + logger.info("Calculating action {} took {:.2f} seconds".format(action.pk, (time.time() - start_time))) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index e69de29bb2d1d..607184df49188 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -0,0 +1,19 @@ +import logging + +import posthoganalytics +from django.utils import timezone + +from posthog.models import Event +from posthog.utils import get_machine_id + +logger = logging.getLogger(__name__) + + +def status_report() -> None: + period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( + hour=0, minute=0, second=0, microsecond=0 + ) # very start of the current Monday + period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one + events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) + report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} + posthoganalytics.capture(get_machine_id(), "instance status report", report) diff --git a/posthog/utils.py b/posthog/utils.py index 8ebfba61cdb1b..3e3ac45d1ea44 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -19,7 +19,6 @@ from dateutil.relativedelta import relativedelta from django.apps import apps from django.conf import settings -from django.contrib.auth.models import AnonymousUser from django.http import HttpRequest, HttpResponse, JsonResponse from django.template.loader import get_template from django.utils import timezone @@ -314,6 +313,8 @@ def authenticate(self, request: Request): class PublicTokenAuthentication(authentication.BaseAuthentication): def authenticate(self, request: Request): + from django.contrib.auth.models import AnonymousUser + if request.GET.get("share_token") and request.parser_context and request.parser_context.get("kwargs"): Dashboard = apps.get_model(app_label="posthog", model_name="Dashboard") dashboard = Dashboard.objects.filter( From e910a14cf69b574ad51b6ad42929972a4fefc0cf Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 21 Sep 2020 00:22:21 +0200 Subject: [PATCH 04/11] Improve instance analytics --- posthog/celery.py | 7 +++-- posthog/tasks/status_report.py | 47 +++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/posthog/celery.py b/posthog/celery.py index a47d4358acea6..03d078c0731a2 100644 --- a/posthog/celery.py +++ b/posthog/celery.py @@ -1,14 +1,13 @@ import os import time +import posthoganalytics import redis import statsd # type: ignore -from celery import Celery, group +from celery import Celery from celery.schedules import crontab -from dateutil import parser from django.conf import settings from django.db import connection -from django.utils import timezone from posthog.settings import STATSD_HOST, STATSD_PORT, STATSD_PREFIX @@ -48,7 +47,7 @@ def setup_periodic_tasks(sender, **kwargs): crontab(day_of_week="mon,fri"), update_event_partitions.s(), # check twice a week ) sender.add_periodic_task( - 10, status_report.s(), + crontab(day_of_week="mon"), status_report.s(), ) sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug") sender.add_periodic_task(600, check_cached_items.s(), name="check dashboard items") diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index 607184df49188..a40c271d54913 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -1,19 +1,54 @@ import logging +from datetime import datetime, timedelta import posthoganalytics -from django.utils import timezone +from celery.utils.functional import first +from django.db import connection +from psycopg2 import sql -from posthog.models import Event +from posthog.models import Event, User +from posthog.models.utils import namedtuplefetchall from posthog.utils import get_machine_id +from posthog.version import VERSION logger = logging.getLogger(__name__) def status_report() -> None: - period_end = (timezone.now() - timezone.timedelta(timezone.now().weekday())).replace( + period_end = (datetime.utcnow() - timedelta(datetime.utcnow().weekday())).replace( hour=0, minute=0, second=0, microsecond=0 ) # very start of the current Monday - period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one - events_considered = Event.objects.filter(created_at__gte=period_start, created_at_lt=period_end) - report = {"period": [period_start.isoformat(), period_end.isoformat()], "event_count": events_considered.count()} + period_start = period_end - timedelta(7) # very start of the Monday preceding the current one + report = {"period": {"start_inclusive": period_start.isoformat(), "start_exclusive": period_end.isoformat()}} + report["posthog_version"] = VERSION + report["users_who_logged_in"] = [ + {"id": user.id} + if user.anonymize_data + else {"id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email} + for user in User.objects.filter(last_login__gte=period_start, last_login__lt=period_end) + ] + events_considered = Event.objects.filter(created_at__gte=period_start, created_at__lt=period_end) + report["events_count_total"] = events_considered.count() + with connection.cursor() as cursor: + cursor.execute( + sql.SQL( + """ + SELECT properties->>'$lib' as lib, COUNT(*) as count + FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY lib + """ + ), + (report["period"]["start_inclusive"], report["period"]["start_exclusive"]), + ) + report["events_count_by_lib"] = {result.lib: result.count for result in namedtuplefetchall(cursor)} + cursor.execute( + sql.SQL( + """ + SELECT event as name, COUNT(*) as count + FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY name + """ + ), + (report["period"]["start_inclusive"], report["period"]["start_exclusive"]), + ) + report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)} + posthoganalytics.api_key = "sTMFPsFhdP1Ssg" posthoganalytics.capture(get_machine_id(), "instance status report", report) From c63cf73326e8aaf684d2495d8778366b40be2c36 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 21 Sep 2020 00:26:31 +0200 Subject: [PATCH 05/11] Satisfy mypy --- posthog/tasks/status_report.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index a40c271d54913..66a3d2a5f91e7 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -1,10 +1,11 @@ import logging from datetime import datetime, timedelta +from typing import Any, Dict import posthoganalytics from celery.utils.functional import first from django.db import connection -from psycopg2 import sql +from psycopg2 import sql # type: ignore from posthog.models import Event, User from posthog.models.utils import namedtuplefetchall @@ -19,7 +20,9 @@ def status_report() -> None: hour=0, minute=0, second=0, microsecond=0 ) # very start of the current Monday period_start = period_end - timedelta(7) # very start of the Monday preceding the current one - report = {"period": {"start_inclusive": period_start.isoformat(), "start_exclusive": period_end.isoformat()}} + report: Dict[str, Any] = { + "period": {"start_inclusive": period_start.isoformat(), "start_exclusive": period_end.isoformat()} + } report["posthog_version"] = VERSION report["users_who_logged_in"] = [ {"id": user.id} From d59465f5b89a0a68c14a81b3a9fdcac39ce5865b Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 21 Sep 2020 00:29:50 +0200 Subject: [PATCH 06/11] Include users as active if they logged in after period --- posthog/tasks/status_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index 66a3d2a5f91e7..324cc3940d19f 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -28,7 +28,7 @@ def status_report() -> None: {"id": user.id} if user.anonymize_data else {"id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email} - for user in User.objects.filter(last_login__gte=period_start, last_login__lt=period_end) + for user in User.objects.filter(last_login__gte=period_start) ] events_considered = Event.objects.filter(created_at__gte=period_start, created_at__lt=period_end) report["events_count_total"] = events_considered.count() From 0191bfa373a27ff1728955597410ee5637a0750e Mon Sep 17 00:00:00 2001 From: Paolo D'Amico Date: Mon, 21 Sep 2020 06:14:17 +0100 Subject: [PATCH 07/11] minor typo fix --- posthog/tasks/status_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index 324cc3940d19f..57cccbb7b7dd6 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -21,7 +21,7 @@ def status_report() -> None: ) # very start of the current Monday period_start = period_end - timedelta(7) # very start of the Monday preceding the current one report: Dict[str, Any] = { - "period": {"start_inclusive": period_start.isoformat(), "start_exclusive": period_end.isoformat()} + "period": {"start_inclusive": period_start.isoformat(), "end_exclusive": period_end.isoformat()} } report["posthog_version"] = VERSION report["users_who_logged_in"] = [ From cb1a1dda81e07c8faf86744d0c9d4ac99ba9b6be Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 21 Sep 2020 13:13:34 +0200 Subject: [PATCH 08/11] Move auth classes to a file of their own --- posthog/api/action.py | 9 +- posthog/api/capture.py | 3 +- posthog/api/dashboard.py | 3 +- posthog/api/decide.py | 3 +- posthog/api/element.py | 2 +- posthog/api/test/test_personal_api_keys.py | 2 - posthog/api/user.py | 4 +- posthog/auth.py | 138 ++++++++++++++++++++ posthog/middleware.py | 4 +- posthog/settings.py | 2 +- posthog/utils.py | 139 +-------------------- 11 files changed, 156 insertions(+), 153 deletions(-) create mode 100644 posthog/auth.py diff --git a/posthog/api/action.py b/posthog/api/action.py index bc9d669cfe58e..1c53459cf593d 100644 --- a/posthog/api/action.py +++ b/posthog/api/action.py @@ -37,6 +37,7 @@ from rest_hooks.signals import raw_hook_event from posthog.api.user import UserSerializer +from posthog.auth import PersonalAPIKeyAuthentication, TemporaryTokenAuthentication from posthog.celery import update_cache_item_task from posthog.constants import TREND_FILTER_TYPE_ACTIONS, TREND_FILTER_TYPE_EVENTS, TRENDS_CUMULATIVE, TRENDS_STICKINESS from posthog.decorators import FUNNEL_ENDPOINT, TRENDS_ENDPOINT, cached_function @@ -55,13 +56,7 @@ ) from posthog.queries import base, funnel, retention, stickiness, trends from posthog.tasks.calculate_action import calculate_action -from posthog.utils import ( - PersonalAPIKeyAuthentication, - TemporaryTokenAuthentication, - append_data, - generate_cache_key, - get_compare_period_dates, -) +from posthog.utils import generate_cache_key from .person import PersonSerializer diff --git a/posthog/api/capture.py b/posthog/api/capture.py index 05183a13bfe23..91f8891630d93 100644 --- a/posthog/api/capture.py +++ b/posthog/api/capture.py @@ -9,10 +9,11 @@ from django.utils import timezone from django.views.decorators.csrf import csrf_exempt +from posthog.auth import PersonalAPIKeyAuthentication from posthog.ee import check_ee_enabled from posthog.models import Team from posthog.tasks.process_event import process_event -from posthog.utils import PersonalAPIKeyAuthentication, cors_response, get_ip_address, load_data_from_request +from posthog.utils import cors_response, get_ip_address, load_data_from_request if settings.EE_AVAILABLE: from ee.clickhouse.process_event import process_event_ee # type: ignore diff --git a/posthog/api/dashboard.py b/posthog/api/dashboard.py index 4defb268ffb51..7bcb0a99e26a3 100644 --- a/posthog/api/dashboard.py +++ b/posthog/api/dashboard.py @@ -13,8 +13,9 @@ from rest_framework.decorators import action from rest_framework.exceptions import AuthenticationFailed +from posthog.auth import PersonalAPIKeyAuthentication, PublicTokenAuthentication from posthog.models import Dashboard, DashboardItem, Filter -from posthog.utils import PersonalAPIKeyAuthentication, PublicTokenAuthentication, generate_cache_key, render_template +from posthog.utils import generate_cache_key, render_template class DashboardSerializer(serializers.ModelSerializer): diff --git a/posthog/api/decide.py b/posthog/api/decide.py index 54aabc3408881..03ff3d64bc2a7 100644 --- a/posthog/api/decide.py +++ b/posthog/api/decide.py @@ -7,8 +7,9 @@ from django.http import HttpRequest, JsonResponse from django.views.decorators.csrf import csrf_exempt +from posthog.auth import PersonalAPIKeyAuthentication from posthog.models import FeatureFlag, Team -from posthog.utils import PersonalAPIKeyAuthentication, base64_to_json, cors_response, load_data_from_request +from posthog.utils import base64_to_json, cors_response, load_data_from_request def _load_data(request) -> Optional[Union[Dict[str, Any], List]]: diff --git a/posthog/api/element.py b/posthog/api/element.py index 2cabc3fe9b3ca..9dbfcf72c548f 100644 --- a/posthog/api/element.py +++ b/posthog/api/element.py @@ -4,8 +4,8 @@ from rest_framework import authentication, request, response, serializers, viewsets from rest_framework.decorators import action +from posthog.auth import PersonalAPIKeyAuthentication, TemporaryTokenAuthentication from posthog.models import Element, ElementGroup, Event, Filter, Team -from posthog.utils import PersonalAPIKeyAuthentication, TemporaryTokenAuthentication class ElementSerializer(serializers.ModelSerializer): diff --git a/posthog/api/test/test_personal_api_keys.py b/posthog/api/test/test_personal_api_keys.py index f286541a8b3cf..f74742126ba53 100644 --- a/posthog/api/test/test_personal_api_keys.py +++ b/posthog/api/test/test_personal_api_keys.py @@ -1,5 +1,3 @@ -from typing import Optional, Type - from posthog.models import PersonalAPIKey from .base import TransactionBaseTest diff --git a/posthog/api/user.py b/posthog/api/user.py index 5fd86f9522dee..3eadf868453e2 100644 --- a/posthog/api/user.py +++ b/posthog/api/user.py @@ -10,13 +10,13 @@ from django.contrib.auth import update_session_auth_hash from django.contrib.auth.password_validation import validate_password from django.core.exceptions import ValidationError -from django.http import HttpRequest, HttpResponse, JsonResponse +from django.http import HttpResponse, JsonResponse from django.shortcuts import redirect from django.views.decorators.http import require_http_methods from rest_framework import exceptions, serializers +from posthog.auth import authenticate_secondarily from posthog.models import Event, User -from posthog.utils import PersonalAPIKeyAuthentication, authenticate_secondarily from posthog.version import VERSION diff --git a/posthog/auth.py b/posthog/auth.py new file mode 100644 index 0000000000000..5f2f4695407a1 --- /dev/null +++ b/posthog/auth.py @@ -0,0 +1,138 @@ +import functools +import re +from typing import Any, Dict, Optional, Tuple, Union +from urllib.parse import urlsplit + +from dateutil import parser +from django.apps import apps +from django.contrib.auth.models import AnonymousUser +from django.http import HttpRequest, JsonResponse +from django.utils import timezone +from rest_framework import authentication +from rest_framework.exceptions import AuthenticationFailed +from rest_framework.request import Request + + +class PersonalAPIKeyAuthentication(authentication.BaseAuthentication): + """A way of authenticating with personal API keys. + Only the first key candidate found in the request is tried, and the order is: + 1. Request Authorization header of type Bearer. + 2. Request body. + 3. Request query string. + """ + + keyword = "Bearer" + + @classmethod + def find_key_with_source( + cls, + request: Union[HttpRequest, Request], + request_data: Optional[Dict[str, Any]] = None, + extra_data: Optional[Dict[str, Any]] = None, + ) -> Optional[Tuple[str, str]]: + """Try to find personal API key in request and return it along with where it was found.""" + if "HTTP_AUTHORIZATION" in request.META: + authorization_match = re.match(fr"^{cls.keyword}\s+(\S.+)$", request.META["HTTP_AUTHORIZATION"]) + if authorization_match: + return authorization_match.group(1).strip(), "Authorization header" + if request_data is None and isinstance(request, Request): + data = request.data + else: + data = request_data or {} + if "personal_api_key" in data: + return data["personal_api_key"], "body" + if "personal_api_key" in request.GET: + return request.GET["personal_api_key"], "query string" + if extra_data and "personal_api_key" in extra_data: + # compatibility with /capture endpoint + return extra_data["personal_api_key"], "query string data" + return None + + @classmethod + def find_key( + cls, + request: Union[HttpRequest, Request], + request_data: Optional[Dict[str, Any]] = None, + extra_data: Optional[Dict[str, Any]] = None, + ) -> Optional[str]: + """Try to find personal API key in request and return it.""" + key_with_source = cls.find_key_with_source(request, request_data, extra_data) + return key_with_source[0] if key_with_source is not None else None + + @classmethod + def authenticate(cls, request: Union[HttpRequest, Request]) -> Optional[Tuple[Any, None]]: + personal_api_key_with_source = cls.find_key_with_source(request) + if not personal_api_key_with_source: + return None + personal_api_key, source = personal_api_key_with_source + PersonalAPIKey = apps.get_model(app_label="posthog", model_name="PersonalAPIKey") + try: + personal_api_key_object = ( + PersonalAPIKey.objects.select_related("user").filter(user__is_active=True).get(value=personal_api_key) + ) + except PersonalAPIKey.DoesNotExist: + raise AuthenticationFailed(detail=f"Personal API key found in request {source} is invalid.") + personal_api_key_object.last_used_at = timezone.now() + personal_api_key_object.save() + assert personal_api_key_object.user is not None + return personal_api_key_object.user, None + + @classmethod + def authenticate_header(cls, request) -> str: + return cls.keyword + + +class TemporaryTokenAuthentication(authentication.BaseAuthentication): + def authenticate(self, request: Request): + # if the Origin is different, the only authentication method should be temporary_token + # This happens when someone is trying to create actions from the editor on their own website + if ( + request.headers.get("Origin") + and urlsplit(request.headers["Origin"]).netloc not in urlsplit(request.build_absolute_uri("/")).netloc + ): + if not request.GET.get("temporary_token"): + raise AuthenticationFailed( + detail="No temporary_token set. " + + "That means you're either trying to access this API from a different site, " + + "or it means your proxy isn't sending the correct headers. " + + "See https://posthog.com/docs/deployment/running-behind-proxy for more information." + ) + if request.GET.get("temporary_token"): + User = apps.get_model(app_label="posthog", model_name="User") + user = User.objects.filter(temporary_token=request.GET.get("temporary_token")) + if not user.exists(): + raise AuthenticationFailed(detail="User doesn't exist") + return (user.first(), None) + return None + + +class PublicTokenAuthentication(authentication.BaseAuthentication): + def authenticate(self, request: Request): + if request.GET.get("share_token") and request.parser_context and request.parser_context.get("kwargs"): + Dashboard = apps.get_model(app_label="posthog", model_name="Dashboard") + dashboard = Dashboard.objects.filter( + share_token=request.GET.get("share_token"), pk=request.parser_context["kwargs"].get("pk"), + ) + if not dashboard.exists(): + raise AuthenticationFailed(detail="Dashboard doesn't exist") + return (AnonymousUser(), None) + return None + + +def authenticate_secondarily(endpoint): + """Proper authentication for function views.""" + + @functools.wraps(endpoint) + def wrapper(request: HttpRequest): + if not request.user.is_authenticated: + try: + auth_result = PersonalAPIKeyAuthentication.authenticate(request) + if isinstance(auth_result, tuple) and auth_result[0].__class__.__name__ == "User": + request.user = auth_result[0] + else: + raise AuthenticationFailed("Authentication credentials were not provided.") + except AuthenticationFailed as e: + return JsonResponse({"detail": e.detail}, status=401) + return endpoint(request) + + return wrapper diff --git a/posthog/middleware.py b/posthog/middleware.py index f958660cd6d82..7b932ea8ba1cf 100644 --- a/posthog/middleware.py +++ b/posthog/middleware.py @@ -6,7 +6,7 @@ from django.http import HttpRequest, HttpResponse from django.middleware.csrf import CsrfViewMiddleware -from .utils import PersonalAPIKeyAuthentication +from .auth import PersonalAPIKeyAuthentication class AllowIP(object): @@ -102,7 +102,7 @@ class CsrfOrKeyViewMiddleware(CsrfViewMiddleware): def process_view(self, request, callback, callback_args, callback_kwargs): result = super().process_view(request, callback, callback_args, callback_kwargs) # None if request accepted # if super().process_view did not find a valid CSRF token, try looking for a personal API key - if result is not None and PersonalAPIKeyAuthentication().find_key_with_source(request) is not None: + if result is not None and PersonalAPIKeyAuthentication.find_key_with_source(request) is not None: return self._accept(request) return result diff --git a/posthog/settings.py b/posthog/settings.py index fc36d85dea9b3..400b137de0031 100644 --- a/posthog/settings.py +++ b/posthog/settings.py @@ -383,7 +383,7 @@ def print_warning(warning_lines: Sequence[str]): "PAGE_SIZE": 100, "DEFAULT_PERMISSION_CLASSES": ["rest_framework.permissions.IsAuthenticated",], "DEFAULT_AUTHENTICATION_CLASSES": [ - "posthog.utils.PersonalAPIKeyAuthentication", + "posthog.auth.PersonalAPIKeyAuthentication", "rest_framework.authentication.BasicAuthentication", "rest_framework.authentication.SessionAuthentication", ], diff --git a/posthog/utils.py b/posthog/utils.py index 3e3ac45d1ea44..414c699d25365 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -1,6 +1,5 @@ import base64 import datetime -import functools import gzip import hashlib import json @@ -10,21 +9,18 @@ import time import uuid from typing import Any, Dict, List, Optional, Tuple, Union -from urllib.parse import urlparse, urlsplit +from urllib.parse import urlparse import lzstring # type: ignore import pytz import redis from dateutil import parser from dateutil.relativedelta import relativedelta -from django.apps import apps from django.conf import settings -from django.http import HttpRequest, HttpResponse, JsonResponse +from django.http import HttpRequest, HttpResponse from django.template.loader import get_template from django.utils import timezone from rest_framework import authentication -from rest_framework.exceptions import AuthenticationFailed -from rest_framework.request import Request from sentry_sdk import push_scope @@ -122,14 +118,14 @@ def render_template(template_name: str, request: HttpRequest, context=None) -> H context["debug"] = True try: context["git_rev"] = ( - subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip() + subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("utf-8").strip() ) except: context["git_rev"] = None try: context["git_branch"] = ( subprocess.check_output(["git", "rev-parse", "--symbolic-full-name", "--abbrev-ref", "HEAD"]) - .decode("ascii") + .decode("utf-8") .strip() ) except: @@ -218,114 +214,6 @@ def cors_response(request, response): return response -class PersonalAPIKeyAuthentication(authentication.BaseAuthentication): - """A way of authenticating with personal API keys. - Only the first key candidate found in the request is tried, and the order is: - 1. Request Authorization header of type Bearer. - 2. Request body. - 3. Request query string. - """ - - keyword = "Bearer" - - @classmethod - def find_key_with_source( - cls, - request: Union[HttpRequest, Request], - request_data: Optional[Dict[str, Any]] = None, - extra_data: Optional[Dict[str, Any]] = None, - ) -> Optional[Tuple[str, str]]: - """Try to find personal API key in request and return it along with where it was found.""" - if "HTTP_AUTHORIZATION" in request.META: - authorization_match = re.match(fr"^{cls.keyword}\s+(\S.+)$", request.META["HTTP_AUTHORIZATION"]) - if authorization_match: - return authorization_match.group(1).strip(), "Authorization header" - if request_data is None and isinstance(request, Request): - data = request.data - else: - data = request_data or {} - if "personal_api_key" in data: - return data["personal_api_key"], "body" - if "personal_api_key" in request.GET: - return request.GET["personal_api_key"], "query string" - if extra_data and "personal_api_key" in extra_data: - # compatibility with /capture endpoint - return extra_data["personal_api_key"], "query string data" - return None - - @classmethod - def find_key( - cls, - request: Union[HttpRequest, Request], - request_data: Optional[Dict[str, Any]] = None, - extra_data: Optional[Dict[str, Any]] = None, - ) -> Optional[str]: - """Try to find personal API key in request and return it.""" - key_with_source = cls.find_key_with_source(request, request_data, extra_data) - return key_with_source[0] if key_with_source is not None else None - - @classmethod - def authenticate(cls, request: Union[HttpRequest, Request]) -> Optional[Tuple[Any, None]]: - personal_api_key_with_source = cls.find_key_with_source(request) - if not personal_api_key_with_source: - return None - personal_api_key, source = personal_api_key_with_source - PersonalAPIKey = apps.get_model(app_label="posthog", model_name="PersonalAPIKey") - try: - personal_api_key_object = ( - PersonalAPIKey.objects.select_related("user").filter(user__is_active=True).get(value=personal_api_key) - ) - except PersonalAPIKey.DoesNotExist: - raise AuthenticationFailed(detail=f"Personal API key found in request {source} is invalid.") - personal_api_key_object.last_used_at = timezone.now() - personal_api_key_object.save() - assert personal_api_key_object.user is not None - return personal_api_key_object.user, None - - @classmethod - def authenticate_header(cls, request) -> str: - return cls.keyword - - -class TemporaryTokenAuthentication(authentication.BaseAuthentication): - def authenticate(self, request: Request): - # if the Origin is different, the only authentication method should be temporary_token - # This happens when someone is trying to create actions from the editor on their own website - if ( - request.headers.get("Origin") - and urlsplit(request.headers["Origin"]).netloc not in urlsplit(request.build_absolute_uri("/")).netloc - ): - if not request.GET.get("temporary_token"): - raise AuthenticationFailed( - detail="No temporary_token set. " - + "That means you're either trying to access this API from a different site, " - + "or it means your proxy isn't sending the correct headers. " - + "See https://posthog.com/docs/deployment/running-behind-proxy for more information." - ) - if request.GET.get("temporary_token"): - User = apps.get_model(app_label="posthog", model_name="User") - user = User.objects.filter(temporary_token=request.GET.get("temporary_token")) - if not user.exists(): - raise AuthenticationFailed(detail="User doesn't exist") - return (user.first(), None) - return None - - -class PublicTokenAuthentication(authentication.BaseAuthentication): - def authenticate(self, request: Request): - from django.contrib.auth.models import AnonymousUser - - if request.GET.get("share_token") and request.parser_context and request.parser_context.get("kwargs"): - Dashboard = apps.get_model(app_label="posthog", model_name="Dashboard") - dashboard = Dashboard.objects.filter( - share_token=request.GET.get("share_token"), pk=request.parser_context["kwargs"].get("pk"), - ) - if not dashboard.exists(): - raise AuthenticationFailed(detail="Dashboard doesn't exist") - return (AnonymousUser(), None) - return None - - def generate_cache_key(stringified: str) -> str: return "cache_" + hashlib.md5(stringified.encode("utf-8")).hexdigest() @@ -345,25 +233,6 @@ def get_redis_heartbeat() -> Union[str, int]: return "offline" -def authenticate_secondarily(endpoint): - """Proper authentication for function views.""" - - @functools.wraps(endpoint) - def wrapper(request: HttpRequest): - if not request.user.is_authenticated: - try: - auth_result = PersonalAPIKeyAuthentication().authenticate(request) - if isinstance(auth_result, tuple) and auth_result[0].__class__.__name__ == "User": - request.user = auth_result[0] - else: - raise AuthenticationFailed("Authentication credentials were not provided.") - except AuthenticationFailed as e: - return JsonResponse({"detail": e.detail}, status=401) - return endpoint(request) - - return wrapper - - def base64_to_json(data) -> Dict: return json.loads( base64.b64decode(data.replace(" ", "+") + "===") From 45f19fccdd12fa61221fd8813506bd6b1da2cc6e Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Mon, 21 Sep 2020 13:13:43 +0200 Subject: [PATCH 09/11] Fix report --- posthog/tasks/status_report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index 57cccbb7b7dd6..b473a7071020a 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -40,7 +40,7 @@ def status_report() -> None: FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY lib """ ), - (report["period"]["start_inclusive"], report["period"]["start_exclusive"]), + (report["period"]["start_inclusive"], report["period"]["end_exclusive"]), ) report["events_count_by_lib"] = {result.lib: result.count for result in namedtuplefetchall(cursor)} cursor.execute( @@ -50,7 +50,7 @@ def status_report() -> None: FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY name """ ), - (report["period"]["start_inclusive"], report["period"]["start_exclusive"]), + (report["period"]["start_inclusive"], report["period"]["end_exclusive"]), ) report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)} posthoganalytics.api_key = "sTMFPsFhdP1Ssg" From f83698c83b1350cba016c1a9b3dba0e87f2c550d Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Tue, 22 Sep 2020 17:26:35 +0200 Subject: [PATCH 10/11] Enhance report, per team data --- posthog/tasks/status_report.py | 73 ++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index b473a7071020a..280a897f51371 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -1,13 +1,12 @@ import logging -from datetime import datetime, timedelta from typing import Any, Dict import posthoganalytics -from celery.utils.functional import first from django.db import connection +from django.utils import timezone from psycopg2 import sql # type: ignore -from posthog.models import Event, User +from posthog.models import Event, PersonDistinctId, Team, User, person from posthog.models.utils import namedtuplefetchall from posthog.utils import get_machine_id from posthog.version import VERSION @@ -16,42 +15,58 @@ def status_report() -> None: - period_end = (datetime.utcnow() - timedelta(datetime.utcnow().weekday())).replace( + now = timezone.now() + period_end = (now - timezone.timedelta(now.weekday())).replace( hour=0, minute=0, second=0, microsecond=0 ) # very start of the current Monday - period_start = period_end - timedelta(7) # very start of the Monday preceding the current one + period_start = period_end - timezone.timedelta(7) # very start of the Monday preceding the current one report: Dict[str, Any] = { - "period": {"start_inclusive": period_start.isoformat(), "end_exclusive": period_end.isoformat()} + "posthog_version": VERSION, + "period": {"start_inclusive": period_start.isoformat(), "end_exclusive": period_end.isoformat()}, } - report["posthog_version"] = VERSION report["users_who_logged_in"] = [ - {"id": user.id} + {"id": user.id, "distinct_id": user.distinct_id} if user.anonymize_data else {"id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email} for user in User.objects.filter(last_login__gte=period_start) ] - events_considered = Event.objects.filter(created_at__gte=period_start, created_at__lt=period_end) - report["events_count_total"] = events_considered.count() - with connection.cursor() as cursor: - cursor.execute( - sql.SQL( - """ - SELECT properties->>'$lib' as lib, COUNT(*) as count - FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY lib - """ - ), - (report["period"]["start_inclusive"], report["period"]["end_exclusive"]), + report["teams"] = {} + for team in Team.objects.all(): + team_report: Dict[str, Any] = {} + events_considered_total = Event.objects.filter(team_id=team.id) + events_considered_new_in_period = events_considered_total.filter( + created_at__gte=period_start, created_at__lt=period_end ) - report["events_count_by_lib"] = {result.lib: result.count for result in namedtuplefetchall(cursor)} - cursor.execute( - sql.SQL( - """ - SELECT event as name, COUNT(*) as count - FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY name - """ - ), - (report["period"]["start_inclusive"], report["period"]["end_exclusive"]), + persons_considered_total = Event.objects.filter(team_id=team.id) + persons_considered_total_new_in_period = persons_considered_total.filter( + created_at__gte=period_start, created_at__lt=period_end ) - report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)} + team_report["events_count_total"] = events_considered_total.count() + team_report["events_count_new_in_period"] = events_considered_new_in_period.count() + team_report["persons_count_total"] = persons_considered_total.count() + team_report["persons_count_new_in_period"] = persons_considered_total_new_in_period.count() + + with connection.cursor() as cursor: + cursor.execute( + sql.SQL( + """ + SELECT properties->>'$lib' as lib, COUNT(*) as count + FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY lib + """ + ), + (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), + ) + team_report["events_count_by_lib"] = {result.lib: result.count for result in namedtuplefetchall(cursor)} + cursor.execute( + sql.SQL( + """ + SELECT event as name, COUNT(*) as count + FROM posthog_event WHERE team_id = %s AND created_at >= %s AND created_at < %s GROUP BY name + """ + ), + (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), + ) + team_report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)} + report["teams"][team.id] = team_report posthoganalytics.api_key = "sTMFPsFhdP1Ssg" posthoganalytics.capture(get_machine_id(), "instance status report", report) From 022622fd170bf24cf8f6e2ed0910a3ec43f0b448 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Wed, 23 Sep 2020 03:31:28 +0200 Subject: [PATCH 11/11] Add persons active in period count and fix capturing --- posthog/tasks/status_report.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/posthog/tasks/status_report.py b/posthog/tasks/status_report.py index 280a897f51371..e278fa1bc03d0 100644 --- a/posthog/tasks/status_report.py +++ b/posthog/tasks/status_report.py @@ -6,7 +6,7 @@ from django.utils import timezone from psycopg2 import sql # type: ignore -from posthog.models import Event, PersonDistinctId, Team, User, person +from posthog.models import Event, Team, User, person from posthog.models.utils import namedtuplefetchall from posthog.utils import get_machine_id from posthog.version import VERSION @@ -47,6 +47,16 @@ def status_report() -> None: team_report["persons_count_new_in_period"] = persons_considered_total_new_in_period.count() with connection.cursor() as cursor: + cursor.execute( + sql.SQL( + """ + SELECT COUNT(DISTINCT person_id) as persons_count + FROM posthog_event JOIN posthog_persondistinctid ON (posthog_event.distinct_id = posthog_persondistinctid.distinct_id) WHERE posthog_event.team_id = %s AND posthog_event.created_at >= %s AND posthog_event.created_at < %s + """ + ), + (team.id, report["period"]["start_inclusive"], report["period"]["end_exclusive"]), + ) + team_report["persons_count_active_in_period"] = cursor.fetchone()[0] cursor.execute( sql.SQL( """ @@ -69,4 +79,7 @@ def status_report() -> None: team_report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)} report["teams"][team.id] = team_report posthoganalytics.api_key = "sTMFPsFhdP1Ssg" + disabled = posthoganalytics.disabled + posthoganalytics.disabled = False posthoganalytics.capture(get_machine_id(), "instance status report", report) + posthoganalytics.disabled = disabled