Skip to content

Add support for streaming a large JSON array #526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,15 @@ impl<'de, R: Read<'de>> Deserializer<R> {
}
}

/// Parse the JSON array as a stream of values.
pub fn into_array(self) -> ArrayDeserializer<'de, R> {
ArrayDeserializer {
de: self,
started: false,
lifetime: PhantomData,
}
}

/// Parse arbitrarily deep JSON structures without any consideration for
/// overflowing the stack.
///
Expand Down Expand Up @@ -2169,6 +2178,109 @@ where
}
}



//////////////////////////////////////////////////////////////////////////////

/// A streaming JSON array deserializer.
///
/// An array deserializer can be created from any JSON deserializer using the
/// `Deserializer::into_array` method.
///
/// The top-level data should be a JSON array, but each array element can consist of any JSON
/// value. An array deserializer only needs to keep a single array element in memory, and is
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
/// array is too large to fit in memory.
///
/// ```edition2018
/// use serde_json::{Deserializer, Value};
///
/// fn main() {
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
///
/// let mut iter = Deserializer::from_str(data).into_array();
///
/// while let Some(value) = iter.next::<Value>() {
/// println!("{}", value.unwrap());
/// }
/// }
/// ```
pub struct ArrayDeserializer<'de, R> {
de: Deserializer<R>,
started: bool, // True if we have consumed the first '['
lifetime: PhantomData<&'de ()>,
}

impl<'de, R> ArrayDeserializer<'de, R>
where
R: read::Read<'de>,
{
/// Create a JSON array deserializer from one of the possible serde_json
/// input sources.
///
/// Typically it is more convenient to use one of these methods instead:
///
/// - Deserializer::from_str(...).into_array()
/// - Deserializer::from_bytes(...).into_array()
/// - Deserializer::from_reader(...).into_array()
pub fn new(read: R) -> Self {
ArrayDeserializer {
de: Deserializer::new(read),
started: false,
lifetime: PhantomData,
}
}

fn end<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
self.de.eat_char();
match self.de.end() {
Ok(_) => None,
Err(e) => Some(Err(e)),
}
}

fn next_value<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
match de::Deserialize::deserialize(&mut self.de) {
Ok(v) => Some(Ok(v)),
Err(e) => Some(Err(e))
}
}

/// Return the next element from the array. Returns None if there are no more elements.
pub fn next<T: de::Deserialize<'de>>(&mut self) -> Option<Result<T>> {
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b'[')) if !self.started => {
self.started = true;
self.de.eat_char();

// We have to peek at the next character here to handle an empty array.
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => self.end(),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(b']')) if self.started => self.end(),
Ok(Some(b',')) if self.started => {
self.de.eat_char();

match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
Err(e) => Some(Err(e)),
}
}
}



//////////////////////////////////////////////////////////////////////////////

fn from_trait<'de, R, T>(read: R) -> Result<T>
Expand Down
119 changes: 119 additions & 0 deletions tests/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#![cfg(not(feature = "preserve_order"))]

extern crate serde;

#[macro_use]
extern crate serde_json;

use serde_json::{Deserializer, Value};

// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
#[cfg_attr(rustfmt, rustfmt_skip)]
macro_rules! test_stream {
($data:expr, |$stream:ident| $test:block) => {
{
let de = Deserializer::from_str($data);
let mut $stream = de.into_array();
$test
}
{
let de = Deserializer::from_slice($data.as_bytes());
let mut $stream = de.into_array();
$test
}
{
let mut bytes = $data.as_bytes();
let de = Deserializer::from_reader(&mut bytes);
let mut $stream = de.into_array();
$test
}
};
}

#[test]
fn test_json_array_empty() {
let data = "[]";

test_stream!(data, |stream| {
assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_whitespace() {
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 42);

assert_eq!(stream.next::<Value>().unwrap().unwrap()["y"], 43);

assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_truncated() {
let data = "[{\"x\":40},{\"x\":";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap()["x"], 40);

assert!(stream.next::<Value>().unwrap().unwrap_err().is_eof());
});
}

#[test]
fn test_json_array_primitive() {
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!({}));

assert_eq!(stream.next::<bool>().unwrap().unwrap(), true);

assert_eq!(stream.next::<u32>().unwrap().unwrap(), 1);

assert_eq!(stream.next::<Value>().unwrap().unwrap(), json!([]));

assert_eq!(stream.next::<f32>().unwrap().unwrap(), 1.0);

assert_eq!(stream.next::<String>().unwrap().unwrap(), "hey");

assert_eq!(stream.next::<Value>().unwrap().unwrap(), Value::Null);

assert!(stream.next::<Value>().is_none());
});
}

#[test]
fn test_json_array_tailing_data() {
let data = "[]e";

test_stream!(data, |stream| {
let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
});
}

#[test]
fn test_json_array_tailing_comma() {
let data = "[true,]";

test_stream!(data, |stream| {
assert_eq!(stream.next::<Value>().unwrap().unwrap(), true);

let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
});
}

#[test]
fn test_json_array_eof() {
let data = "";

test_stream!(data, |stream| {
let second = stream.next::<Value>().unwrap().unwrap_err();
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
});
}