Skip to content

Commit

Permalink
[OPIK-568] Updated uuidv4_to_uuidv7 function to handle timestamps (#1204
Browse files Browse the repository at this point in the history
)

* Updated uuidv4_to_uuidv7 function to handle timestamps

* Updated uuidv4_to_uuidv7 function to handle timestamps

* Fix issue following review
  • Loading branch information
jverre authored Feb 6, 2025
1 parent fb90002 commit 25a2641
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 36 deletions.
61 changes: 25 additions & 36 deletions sdks/python/src/opik/id_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,31 @@
import uuid


# STATIC NS
UUIDV7_NS = 1733244176020523256


def uuid4_to_uuid7(user_datetime: datetime, user_uuid: str) -> uuid.UUID:
"""Convert an UUID4 in a UUID7 in a naive way"""
last = [0, 0, 0, 0]

# Use a hardcoded timestamp in ns for now
ns = UUIDV7_NS

# Simple uuid7 implementation
sixteen_secs = 16_000_000_000
t1, rest1 = divmod(ns, sixteen_secs)
t2, rest2 = divmod(rest1 << 16, sixteen_secs)
t3, _ = divmod(rest2 << 12, sixteen_secs)
t3 |= 7 << 12 # Put uuid version in top 4 bits, which are 0 in t3

# The next two bytes are an int (t4) with two bits for
# the variant 2 and a 14 bit sequence counter which increments
# if the time is unchanged.
if t1 == last[0] and t2 == last[1] and t3 == last[2]:
# Stop the seq counter wrapping past 0x3FFF.
# This won't happen in practice, but if it does,
# uuids after the 16383rd with that same timestamp
# will not longer be correctly ordered but
# are still unique due to the 6 random bytes.
if last[3] < 0x3FFF:
last[3] += 1
else:
last[:] = (t1, t2, t3, 0)
t4 = (2 << 14) | last[3] # Put variant 0b10 in top two bits

# Six random bytes from the provided UUIDv4
"""Convert a UUID v4 into a UUID v7 following RFC draft specification."""
# Get Unix timestamp in milliseconds
unix_ts_ms = int(user_datetime.timestamp() * 1000)

uuidv4 = uuid.UUID(user_uuid)
assert uuidv4.version == 4
rand = uuidv4.bytes[-6:]
if uuidv4.version != 4:
raise ValueError("Input UUID must be version 4")

# Create the 16-byte array
uuid_bytes = bytearray(16)

# First 48 bits (6 bytes): Unix timestamp in milliseconds
uuid_bytes[0:6] = unix_ts_ms.to_bytes(6, byteorder="big")

# Next byte: Version 7 in top 4 bits
uuid_bytes[6] = 0x70 | (uuidv4.bytes[6] & 0x0F)

# Next byte: random from UUID v4
uuid_bytes[7] = uuidv4.bytes[7]

# Next byte: Variant bits (0b10) in top 2 bits
uuid_bytes[8] = 0x80 | (uuidv4.bytes[8] & 0x3F)

# Remaining bytes: random from UUID v4
uuid_bytes[9:16] = uuidv4.bytes[9:16]

return uuid.UUID(f"{t1:>08x}-{t2:>04x}-{t3:>04x}-{t4:>04x}-{rand.hex()}")
return uuid.UUID(bytes=bytes(uuid_bytes))
72 changes: 72 additions & 0 deletions sdks/python/tests/unit/test_id_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from datetime import datetime
import uuid
from opik.id_helpers import uuid4_to_uuid7
import time


def test_uuid4_to_uuid7__generates_valid_uuidv7():
"""
Test that uuid4_to_uuid7 generates valid UUIDv7.
"""
uuid_v7 = uuid4_to_uuid7(datetime.now(), str(uuid.uuid4()))
assert uuid_v7.version == 7, f"Generated UUID {uuid_v7} is not a version 7 UUID"


def test_uuid4_to_uuid7__generates_consistent_uuids():
"""
Test that uuid4_to_uuid7 generates consistent UUIDs.
"""
# Create test data with known timestamps and UUIDs
NB_ID = 5
test_uuids = []

timestamp = datetime.now()
uuid4 = str(uuid.uuid4())

for i in range(NB_ID):
test_uuids.append((timestamp, uuid4))

# Convert UUIDs
uuids_v7 = [str(uuid4_to_uuid7(ts, uuid4)) for ts, uuid4 in test_uuids]

# Check ids are distinct
assert len(set(uuids_v7)) == 1, "UUIDs are not distinct"


def test_uuid4_to_uuid7__sequential_timestamps__maintains_temporal_ordering():
"""
Test that uuid4_to_uuid7 maintains temporal ordering when given sequential timestamps.
"""
# Create test data with known timestamps and UUIDs
NB_ID = 5
test_uuids = []

for i in range(NB_ID):
test_uuids.append((datetime.now(), str(uuid.uuid4())))
time.sleep(0.5)

# Convert UUIDs
uuids_v7 = [str(uuid4_to_uuid7(ts, uuid4)) for ts, uuid4 in test_uuids]

# Assert temporal ordering
assert uuids_v7 == sorted(uuids_v7), "UUIDs are not sorted"


def test_uuid4_to_uuid7__different_uuid4_same_timestamp():
"""
Test that uuid4_to_uuid7 creates different UUIDv7 when given the same timestamp and different UUIDv4.
"""
# Create test data with known timestamps and UUIDs
NB_ID = 5
test_uuids = []
timestamp = datetime.now()

for i in range(NB_ID):
test_uuids.append((timestamp, str(uuid.uuid4())))
time.sleep(0.5)

# Convert UUIDs
uuids_v7 = [str(uuid4_to_uuid7(ts, uuid4)) for ts, uuid4 in test_uuids]

# Check ids are different
assert len(uuids_v7) == len(set(uuids_v7)), "UUIDs are not distinct"

0 comments on commit 25a2641

Please sign in to comment.