This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support to write Avro async (#736)
- Loading branch information
1 parent
e001ba5
commit 299df30
Showing
7 changed files
with
158 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
use futures::{AsyncWrite, AsyncWriteExt}; | ||
|
||
use crate::error::Result; | ||
|
||
use super::super::write::{util::zigzag_encode, SYNC_NUMBER}; | ||
use super::super::CompressedBlock; | ||
|
||
/// Writes a [`CompressedBlock`] to `writer` | ||
pub async fn write_block<W>(writer: &mut W, compressed_block: &CompressedBlock) -> Result<()> | ||
where | ||
W: AsyncWrite + Unpin, | ||
{ | ||
// write size and rows | ||
let mut scratch = Vec::with_capacity(10); | ||
zigzag_encode(compressed_block.number_of_rows as i64, &mut scratch)?; | ||
writer.write_all(&scratch).await?; | ||
scratch.clear(); | ||
zigzag_encode(compressed_block.data.len() as i64, &mut scratch)?; | ||
writer.write_all(&scratch).await?; | ||
|
||
writer.write_all(&compressed_block.data).await?; | ||
|
||
writer.write_all(&SYNC_NUMBER).await?; | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
//! Async write Avro | ||
mod block; | ||
pub use block::write_block; | ||
|
||
use avro_schema::{Field as AvroField, Record, Schema as AvroSchema}; | ||
use futures::{AsyncWrite, AsyncWriteExt}; | ||
|
||
use crate::error::Result; | ||
|
||
use super::{ | ||
write::{write_schema, AVRO_MAGIC, SYNC_NUMBER}, | ||
Compression, | ||
}; | ||
|
||
/// Writes Avro's metadata to `writer`. | ||
pub async fn write_metadata<W>( | ||
writer: &mut W, | ||
fields: Vec<AvroField>, | ||
compression: Option<Compression>, | ||
) -> Result<()> | ||
where | ||
W: AsyncWrite + Unpin, | ||
{ | ||
writer.write_all(&AVRO_MAGIC).await?; | ||
|
||
// * file metadata, including the schema. | ||
let schema = AvroSchema::Record(Record::new("", fields)); | ||
|
||
let mut scratch = vec![]; | ||
write_schema(&mut scratch, &schema, compression)?; | ||
|
||
writer.write_all(&scratch).await?; | ||
|
||
// The 16-byte, randomly-generated sync marker for this file. | ||
writer.write_all(&SYNC_NUMBER).await?; | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
use arrow2::array::*; | ||
use arrow2::chunk::Chunk; | ||
use arrow2::datatypes::*; | ||
use arrow2::error::Result; | ||
use arrow2::io::avro::write; | ||
use arrow2::io::avro::write_async; | ||
|
||
use super::read::read_avro; | ||
use super::write::{data, schema, serialize_to_block}; | ||
|
||
async fn write_avro<R: AsRef<dyn Array>>( | ||
columns: &Chunk<R>, | ||
schema: &Schema, | ||
compression: Option<write::Compression>, | ||
) -> Result<Vec<u8>> { | ||
// usually done on a different thread pool | ||
let compressed_block = serialize_to_block(columns, schema, compression)?; | ||
|
||
let avro_fields = write::to_avro_schema(schema)?; | ||
let mut file = vec![]; | ||
|
||
write_async::write_metadata(&mut file, avro_fields.clone(), compression).await?; | ||
|
||
write_async::write_block(&mut file, &compressed_block).await?; | ||
|
||
Ok(file) | ||
} | ||
|
||
async fn roundtrip(compression: Option<write::Compression>) -> Result<()> { | ||
let expected = data(); | ||
let expected_schema = schema(); | ||
|
||
let data = write_avro(&expected, &expected_schema, compression).await?; | ||
|
||
let (result, read_schema) = read_avro(&data)?; | ||
|
||
assert_eq!(expected_schema, read_schema); | ||
for (c1, c2) in result.columns().iter().zip(expected.columns().iter()) { | ||
assert_eq!(c1.as_ref(), c2.as_ref()); | ||
} | ||
Ok(()) | ||
} | ||
|
||
#[tokio::test] | ||
async fn no_compression() -> Result<()> { | ||
roundtrip(None).await | ||
} |