diff --git a/docs/source/api.rst b/docs/source/api.rst index 566f97db..882b57b0 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -60,7 +60,7 @@ JSON :members: encode, encode_lines, encode_into .. autoclass:: Decoder - :members: decode + :members: decode, decode_lines .. autofunction:: encode diff --git a/msgspec/_core.c b/msgspec/_core.c index 4682c43e..180254c5 100644 --- a/msgspec/_core.c +++ b/msgspec/_core.c @@ -17764,11 +17764,109 @@ JSONDecoder_decode(JSONDecoder *self, PyObject *const *args, Py_ssize_t nargs) return NULL; } +PyDoc_STRVAR(JSONDecoder_decode_lines__doc__, +"decode_lines(self, buf)\n" +"--\n" +"\n" +"Decode a list of items from newline-delimited JSON.\n" +"\n" +"Parameters\n" +"----------\n" +"buf : bytes-like or str\n" +" The message to decode.\n" +"\n" +"Returns\n" +"-------\n" +"items : list\n" +" A list of decoded objects.\n" +"Examples\n" +"--------\n" +">>> import msgspec\n" +">>> msg = \"\"\"\n" +"... {\"x\": 1, \"y\": 2}\n" +"... {\"x\": 3, \"y\": 4}\n" +"... \"\"\"\n" +">>> dec = msgspec.json.Decoder()\n" +">>> dec.decode_lines(msg)\n" +"[{\"x\": 1, \"y\": 2}, {\"x\": 3, \"y\": 4}]" +); +static PyObject* +JSONDecoder_decode_lines(JSONDecoder *self, PyObject *const *args, Py_ssize_t nargs) +{ + if (!check_positional_nargs(nargs, 1, 1)) { + return NULL; + } + + JSONDecoderState state = { + .type = self->type, + .strict = self->strict, + .dec_hook = self->dec_hook, + .scratch = NULL, + .scratch_capacity = 0, + .scratch_len = 0 + }; + + Py_buffer buffer; + buffer.buf = NULL; + if (ms_get_buffer(args[0], &buffer) >= 0) { + + state.buffer_obj = args[0]; + state.input_start = buffer.buf; + state.input_pos = buffer.buf; + state.input_end = state.input_pos + buffer.len; + + PathNode path = {NULL, 0, NULL}; + + PyObject *out = PyList_New(0); + if (out == NULL) return NULL; + while (true) { + /* Skip until first non-whitespace character, or return if buffer + * exhausted */ + while (true) { + if (state.input_pos == state.input_end) { + goto done; + } + unsigned char c = *state.input_pos; + if (MS_LIKELY(c != ' ' && c != '\n' && c != '\r' && c != '\t')) { + break; + } + state.input_pos++; + } + + /* Read and append next item */ + PyObject *item = json_decode(&state, state.type, &path); + path.index++; + if (item == NULL) { + Py_CLEAR(out); + goto done; + } + int status = PyList_Append(out, item); + Py_DECREF(item); + if (status < 0) { + Py_CLEAR(out); + goto done; + } + } + done: + + ms_release_buffer(&buffer); + + PyMem_Free(state.scratch); + return out; + } + + return NULL; +} + static struct PyMethodDef JSONDecoder_methods[] = { { "decode", (PyCFunction) JSONDecoder_decode, METH_FASTCALL, JSONDecoder_decode__doc__, }, + { + "decode_lines", (PyCFunction) JSONDecoder_decode_lines, METH_FASTCALL, + JSONDecoder_decode_lines__doc__, + }, #if PY_VERSION_HEX >= 0x03090000 {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS}, #endif diff --git a/msgspec/json.pyi b/msgspec/json.pyi index c57773be..dcb1bafb 100644 --- a/msgspec/json.pyi +++ b/msgspec/json.pyi @@ -64,6 +64,7 @@ class Decoder(Generic[T]): dec_hook: dec_hook_sig = None, ) -> None: ... def decode(self, data: Union[bytes, str]) -> T: ... + def decode_lines(self, data: Union[bytes, str]) -> list[T]: ... @overload def decode( diff --git a/tests/basic_typing_examples.py b/tests/basic_typing_examples.py index efb5f634..50949cd7 100644 --- a/tests/basic_typing_examples.py +++ b/tests/basic_typing_examples.py @@ -752,6 +752,19 @@ def check_json_Decoder_decode_from_str() -> None: reveal_type(o) # assert ("List" in typ or "list" in typ) and "int" in typ +def check_json_Decoder_decode_lines_any() -> None: + dec = msgspec.json.Decoder() + o = dec.decode_lines(b'1\n2\n3') + + reveal_type(o) # assert "list" in typ.lower() and "any" in typ.lower() + + +def check_json_Decoder_decode_lines_typed() -> None: + dec = msgspec.json.Decoder(int) + o = dec.decode_lines(b'1\n2\n3') + reveal_type(o) # assert "list" in typ.lower() and "int" in typ.lower() + + def check_json_decode_any() -> None: b = msgspec.json.encode([1, 2, 3]) o = msgspec.json.decode(b) diff --git a/tests/test_json.py b/tests/test_json.py index 2beedded..1b29ebb2 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -470,6 +470,60 @@ def test_decode_with_trailing_characters_errors(self): with pytest.raises(msgspec.DecodeError): dec.decode(b'[1, 2, 3]"trailing"') + @pytest.mark.parametrize( + "msg", + ["", "\n", "1", " 1", "1\t\r\n", "1\n\r\t 2", "1\n2\n", "1\n2\n3\n"], + ) + def test_decode_lines(self, msg): + dec = msgspec.json.Decoder() + sol = [] + for part in msg.splitlines(): + if part := part.strip(): + sol.append(dec.decode(part)) + + res = dec.decode_lines(msg) + assert res == sol + + def test_decode_lines_typed(self): + class Ex(msgspec.Struct): + x: int + + sol = [Ex(1), Ex(2)] + buf = msgspec.json.Encoder().encode_lines(sol) + res = msgspec.json.Decoder(Ex).decode_lines(buf) + assert res == sol + + def test_decode_lines_typed_error(self): + class Ex(msgspec.Struct): + x: int + + buf = b'{"x": 1}\n{"x": "bad"}\n' + + dec = msgspec.json.Decoder(Ex) + with pytest.raises(msgspec.ValidationError) as rec: + dec.decode_lines(buf) + + assert "Expected `int`, got `str`" in str(rec.value) + assert "`$[1].x" in str(rec.value) + + def test_decode_lines_malformed(self): + buf = b'{"x": 1}\n{"x": efg' + dec = msgspec.json.Decoder() + with pytest.raises(msgspec.DecodeError, match="malformed"): + dec.decode_lines(buf) + + def test_decode_lines_bad_call(self): + dec = msgspec.json.Decoder() + + with pytest.raises(TypeError): + dec.decode() + + with pytest.raises(TypeError): + dec.decode("{}", 2) + + with pytest.raises(TypeError): + dec.decode(1) + class TestBoolAndNone: def test_encode_none(self):