Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Send a weekly instance status report (resolves #1509) #1683

Merged
merged 11 commits into from
Sep 23, 2020
9 changes: 3 additions & 6 deletions posthog/apps.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import hashlib
import os
import uuid

import posthoganalytics
from django.apps import AppConfig
from django.conf import settings

from posthog.utils import get_machine_id


class PostHogConfig(AppConfig):
name = "posthog"
Expand All @@ -16,10 +16,7 @@ def ready(self):
if settings.DEBUG:
# log development server launch to posthog
if os.getenv("RUN_MAIN") == "true":
# MAC addresses are 6 bits long, so overflow shouldn't happen
# hashing here as we don't care about the actual address, just it being rather consistent
mac_address_hash = hashlib.md5(uuid.getnode().to_bytes(6, "little"))
posthoganalytics.capture(mac_address_hash.hexdigest(), "development server launched")
posthoganalytics.capture(get_machine_id(), "development server launched")
posthoganalytics.disabled = True
elif settings.TEST or os.environ.get("OPT_OUT_CAPTURE"):
posthoganalytics.disabled = True
16 changes: 12 additions & 4 deletions posthog/celery.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import os
import time
from datetime import datetime
from typing import Optional

import posthoganalytics
import redis
import statsd # type: ignore
from celery import Celery, group
from celery import Celery
from celery.schedules import crontab
from dateutil import parser
from django.conf import settings
from django.db import connection

Expand Down Expand Up @@ -48,6 +46,9 @@ def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
crontab(day_of_week="mon,fri"), update_event_partitions.s(), # check twice a week
)
sender.add_periodic_task(
crontab(day_of_week="mon"), status_report.s(),
)
sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug")
sender.add_periodic_task(600, check_cached_items.s(), name="check dashboard items")

Expand Down Expand Up @@ -85,6 +86,13 @@ def update_event_partitions():
)


@app.task
def status_report():
from posthog.tasks.status_report import status_report

status_report()


@app.task
def calculate_event_action_mappings():
from posthog.tasks.calculate_action import calculate_actions_from_last_calculation
Expand Down
57 changes: 57 additions & 0 deletions posthog/tasks/status_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import logging
from datetime import datetime, timedelta
from typing import Any, Dict

import posthoganalytics
from celery.utils.functional import first
from django.db import connection
from psycopg2 import sql # type: ignore

from posthog.models import Event, User
from posthog.models.utils import namedtuplefetchall
from posthog.utils import get_machine_id
from posthog.version import VERSION

logger = logging.getLogger(__name__)


def status_report() -> None:
period_end = (datetime.utcnow() - timedelta(datetime.utcnow().weekday())).replace(
hour=0, minute=0, second=0, microsecond=0
) # very start of the current Monday
period_start = period_end - timedelta(7) # very start of the Monday preceding the current one
report: Dict[str, Any] = {
"period": {"start_inclusive": period_start.isoformat(), "end_exclusive": period_end.isoformat()}
}
report["posthog_version"] = VERSION
report["users_who_logged_in"] = [
paolodamico marked this conversation as resolved.
Show resolved Hide resolved
{"id": user.id}
if user.anonymize_data
else {"id": user.id, "distinct_id": user.distinct_id, "first_name": user.first_name, "email": user.email}
for user in User.objects.filter(last_login__gte=period_start)
]
events_considered = Event.objects.filter(created_at__gte=period_start, created_at__lt=period_end)
report["events_count_total"] = events_considered.count()
with connection.cursor() as cursor:
cursor.execute(
sql.SQL(
"""
SELECT properties->>'$lib' as lib, COUNT(*) as count
paolodamico marked this conversation as resolved.
Show resolved Hide resolved
FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY lib
"""
),
(report["period"]["start_inclusive"], report["period"]["start_exclusive"]),
)
report["events_count_by_lib"] = {result.lib: result.count for result in namedtuplefetchall(cursor)}
cursor.execute(
sql.SQL(
"""
SELECT event as name, COUNT(*) as count
FROM posthog_event WHERE created_at >= %s AND created_at < %s GROUP BY name
"""
),
(report["period"]["start_inclusive"], report["period"]["start_exclusive"]),
)
report["events_count_by_name"] = {result.name: result.count for result in namedtuplefetchall(cursor)}
posthoganalytics.api_key = "sTMFPsFhdP1Ssg"
paolodamico marked this conversation as resolved.
Show resolved Hide resolved
posthoganalytics.capture(get_machine_id(), "instance status report", report)
paolodamico marked this conversation as resolved.
Show resolved Hide resolved
11 changes: 10 additions & 1 deletion posthog/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import subprocess
import time
import uuid
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse, urlsplit

Expand All @@ -18,7 +19,6 @@
from dateutil.relativedelta import relativedelta
from django.apps import apps
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.http import HttpRequest, HttpResponse, JsonResponse
from django.template.loader import get_template
from django.utils import timezone
Expand Down Expand Up @@ -313,6 +313,8 @@ def authenticate(self, request: Request):

class PublicTokenAuthentication(authentication.BaseAuthentication):
def authenticate(self, request: Request):
from django.contrib.auth.models import AnonymousUser
paolodamico marked this conversation as resolved.
Show resolved Hide resolved

if request.GET.get("share_token") and request.parser_context and request.parser_context.get("kwargs"):
Dashboard = apps.get_model(app_label="posthog", model_name="Dashboard")
dashboard = Dashboard.objects.filter(
Expand Down Expand Up @@ -425,3 +427,10 @@ def __call__(self, *args, **kwds):
if self.instance == None:
self.instance = self.klass(*args, **kwds)
return self.instance


def get_machine_id() -> str:
"""A MAC address-dependent ID. Useful for PostHog instance analytics."""
# MAC addresses are 6 bits long, so overflow shouldn't happen
# hashing here as we don't care about the actual address, just it being rather consistent
return hashlib.md5(uuid.getnode().to_bytes(6, "little")).hexdigest()