Skip to content

Commit

Permalink
Marshalling: Use orjson to improve JSON serialization performance
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Jan 16, 2025
1 parent a2aae9b commit 5b8b18c
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 33 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Changes for crate
Unreleased
==========

- Use ``orjson`` to improve JSON marshalling performance. Thanks, @widmogrod.

2024/11/23 1.0.1
================
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def read(path):
packages=find_namespace_packages("src"),
package_dir={"": "src"},
install_requires=[
"orjson<4",
"urllib3",
"verlib2",
],
Expand Down
52 changes: 26 additions & 26 deletions src/crate/client/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,21 @@
# software solely pursuant to the terms of the relevant commercial agreement.


import calendar
import heapq
import io
import json
import logging
import os
import re
import socket
import ssl
import threading
from base64 import b64encode
from datetime import date, datetime, timezone
from datetime import datetime, timezone
from decimal import Decimal
from time import time
from urllib.parse import urlparse
from uuid import UUID

import orjson
import urllib3
from urllib3 import connection_from_url
from urllib3.connection import HTTPConnection
Expand Down Expand Up @@ -86,25 +84,27 @@ def super_len(o):
return None


class CrateJsonEncoder(json.JSONEncoder):
epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
epoch_naive = datetime(1970, 1, 1)

def default(self, o):
if isinstance(o, (Decimal, UUID)):
return str(o)
if isinstance(o, datetime):
if o.tzinfo is not None:
delta = o - self.epoch_aware
else:
delta = o - self.epoch_naive
return int(
delta.microseconds / 1000.0
+ (delta.seconds + delta.days * 24 * 3600) * 1000.0
)
if isinstance(o, date):
return calendar.timegm(o.timetuple()) * 1000
return json.JSONEncoder.default(self, o)
epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
epoch_naive = datetime(1970, 1, 1)


def cratedb_json_encoder(obj):
"""
Encoder function for orjson.
/~https://github.com/ijl/orjson#default
"""
if isinstance(obj, (Decimal,)):
return str(obj)
return obj


def json_dumps(obj):
return orjson.dumps(
obj,
default=cratedb_json_encoder,
option=orjson.OPT_SERIALIZE_NUMPY,
)


class Server:
Expand Down Expand Up @@ -180,7 +180,7 @@ def close(self):

def _json_from_response(response):
try:
return json.loads(response.data.decode("utf-8"))
return orjson.loads(response.data)
except ValueError as ex:
raise ProgrammingError(
"Invalid server response of content-type '{}':\n{}".format(
Expand Down Expand Up @@ -223,7 +223,7 @@ def _raise_for_status_real(response):
if response.status == 503:
raise ConnectionError(message)
if response.headers.get("content-type", "").startswith("application/json"):
data = json.loads(response.data.decode("utf-8"))
data = orjson.loads(response.data)
error = data.get("error", {})
error_trace = data.get("error_trace", None)
if "results" in data:
Expand Down Expand Up @@ -334,7 +334,7 @@ def _create_sql_payload(stmt, args, bulk_args):
data["args"] = args
if bulk_args:
data["bulk_args"] = bulk_args
return json.dumps(data, cls=CrateJsonEncoder)
return json_dumps(data)


def _get_socket_opts(
Expand Down
14 changes: 7 additions & 7 deletions tests/client/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
)
from crate.client.http import (
Client,
CrateJsonEncoder,
_get_socket_opts,
_remove_certs_for_non_https,
json_dumps,
)

REQUEST = "crate.client.http.Server.request"
Expand Down Expand Up @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request):
# convert string to dict
# because the order of the keys isn't deterministic
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1425108700000])
self.assertEqual(data["args"], ["2015-02-28T07:31:40"])
client.close()

@patch(REQUEST, autospec=True)
Expand All @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request):
day = dt.date(2016, 4, 21)
client.sql("insert into users (dt) values (?)", (day,))
data = json.loads(request.call_args[1]["data"])
self.assertEqual(data["args"], [1461196800000])
self.assertEqual(data["args"], ["2016-04-21"])
client.close()

def test_socket_options_contain_keepalive(self):
Expand Down Expand Up @@ -724,10 +724,10 @@ def test_username(self):
class TestCrateJsonEncoder(TestCase):
def test_naive_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687771440123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000"')

def test_aware_datetime(self):
data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00")
result = json.dumps(data, cls=CrateJsonEncoder)
self.assertEqual(result, "1687764240123")
result = json_dumps(data)
self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"')

0 comments on commit 5b8b18c

Please sign in to comment.