Skip to content

Commit

Permalink
Add msgspec.json.Decoder.decode_lines
Browse files Browse the repository at this point in the history
Adds a new method on the json `Decoder` class for decoding line
delimited JSON.
  • Loading branch information
jcrist committed Jul 14, 2023
1 parent 1314576 commit 83d8add
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ JSON
:members: encode, encode_lines, encode_into

.. autoclass:: Decoder
:members: decode
:members: decode, decode_lines

.. autofunction:: encode

Expand Down
98 changes: 98 additions & 0 deletions msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -17764,11 +17764,109 @@ JSONDecoder_decode(JSONDecoder *self, PyObject *const *args, Py_ssize_t nargs)
return NULL;
}

PyDoc_STRVAR(JSONDecoder_decode_lines__doc__,
"decode_lines(self, buf)\n"
"--\n"
"\n"
"Decode a list of items from newline-delimited JSON.\n"
"\n"
"Parameters\n"
"----------\n"
"buf : bytes-like or str\n"
" The message to decode.\n"
"\n"
"Returns\n"
"-------\n"
"items : list\n"
" A list of decoded objects.\n"
"Examples\n"
"--------\n"
">>> import msgspec\n"
">>> msg = \"\"\"\n"
"... {\"x\": 1, \"y\": 2}\n"
"... {\"x\": 3, \"y\": 4}\n"
"... \"\"\"\n"
">>> dec = msgspec.json.Decoder()\n"
">>> dec.decode_lines(msg)\n"
"[{\"x\": 1, \"y\": 2}, {\"x\": 3, \"y\": 4}]"
);
static PyObject*
JSONDecoder_decode_lines(JSONDecoder *self, PyObject *const *args, Py_ssize_t nargs)
{
if (!check_positional_nargs(nargs, 1, 1)) {
return NULL;
}

JSONDecoderState state = {
.type = self->type,
.strict = self->strict,
.dec_hook = self->dec_hook,
.scratch = NULL,
.scratch_capacity = 0,
.scratch_len = 0
};

Py_buffer buffer;
buffer.buf = NULL;
if (ms_get_buffer(args[0], &buffer) >= 0) {

state.buffer_obj = args[0];
state.input_start = buffer.buf;
state.input_pos = buffer.buf;
state.input_end = state.input_pos + buffer.len;

PathNode path = {NULL, 0, NULL};

PyObject *out = PyList_New(0);
if (out == NULL) return NULL;
while (true) {
/* Skip until first non-whitespace character, or return if buffer
* exhausted */
while (true) {
if (state.input_pos == state.input_end) {
goto done;
}
unsigned char c = *state.input_pos;
if (MS_LIKELY(c != ' ' && c != '\n' && c != '\r' && c != '\t')) {
break;
}
state.input_pos++;
}

/* Read and append next item */
PyObject *item = json_decode(&state, state.type, &path);
path.index++;
if (item == NULL) {
Py_CLEAR(out);
goto done;
}
int status = PyList_Append(out, item);
Py_DECREF(item);
if (status < 0) {
Py_CLEAR(out);
goto done;
}
}
done:

ms_release_buffer(&buffer);

PyMem_Free(state.scratch);
return out;
}

return NULL;
}

static struct PyMethodDef JSONDecoder_methods[] = {
{
"decode", (PyCFunction) JSONDecoder_decode, METH_FASTCALL,
JSONDecoder_decode__doc__,
},
{
"decode_lines", (PyCFunction) JSONDecoder_decode_lines, METH_FASTCALL,
JSONDecoder_decode_lines__doc__,
},
#if PY_VERSION_HEX >= 0x03090000
{"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS},
#endif
Expand Down
1 change: 1 addition & 0 deletions msgspec/json.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class Decoder(Generic[T]):
dec_hook: dec_hook_sig = None,
) -> None: ...
def decode(self, data: Union[bytes, str]) -> T: ...
def decode_lines(self, data: Union[bytes, str]) -> list[T]: ...

@overload
def decode(
Expand Down
13 changes: 13 additions & 0 deletions tests/basic_typing_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,19 @@ def check_json_Decoder_decode_from_str() -> None:
reveal_type(o) # assert ("List" in typ or "list" in typ) and "int" in typ


def check_json_Decoder_decode_lines_any() -> None:
dec = msgspec.json.Decoder()
o = dec.decode_lines(b'1\n2\n3')

reveal_type(o) # assert "list" in typ.lower() and "any" in typ.lower()


def check_json_Decoder_decode_lines_typed() -> None:
dec = msgspec.json.Decoder(int)
o = dec.decode_lines(b'1\n2\n3')
reveal_type(o) # assert "list" in typ.lower() and "int" in typ.lower()


def check_json_decode_any() -> None:
b = msgspec.json.encode([1, 2, 3])
o = msgspec.json.decode(b)
Expand Down
54 changes: 54 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,60 @@ def test_decode_with_trailing_characters_errors(self):
with pytest.raises(msgspec.DecodeError):
dec.decode(b'[1, 2, 3]"trailing"')

@pytest.mark.parametrize(
"msg",
["", "\n", "1", " 1", "1\t\r\n", "1\n\r\t 2", "1\n2\n", "1\n2\n3\n"],
)
def test_decode_lines(self, msg):
dec = msgspec.json.Decoder()
sol = []
for part in msg.splitlines():
if part := part.strip():
sol.append(dec.decode(part))

res = dec.decode_lines(msg)
assert res == sol

def test_decode_lines_typed(self):
class Ex(msgspec.Struct):
x: int

sol = [Ex(1), Ex(2)]
buf = msgspec.json.Encoder().encode_lines(sol)
res = msgspec.json.Decoder(Ex).decode_lines(buf)
assert res == sol

def test_decode_lines_typed_error(self):
class Ex(msgspec.Struct):
x: int

buf = b'{"x": 1}\n{"x": "bad"}\n'

dec = msgspec.json.Decoder(Ex)
with pytest.raises(msgspec.ValidationError) as rec:
dec.decode_lines(buf)

assert "Expected `int`, got `str`" in str(rec.value)
assert "`$[1].x" in str(rec.value)

def test_decode_lines_malformed(self):
buf = b'{"x": 1}\n{"x": efg'
dec = msgspec.json.Decoder()
with pytest.raises(msgspec.DecodeError, match="malformed"):
dec.decode_lines(buf)

def test_decode_lines_bad_call(self):
dec = msgspec.json.Decoder()

with pytest.raises(TypeError):
dec.decode()

with pytest.raises(TypeError):
dec.decode("{}", 2)

with pytest.raises(TypeError):
dec.decode(1)


class TestBoolAndNone:
def test_encode_none(self):
Expand Down

0 comments on commit 83d8add

Please sign in to comment.