qroissant/crates/qroissant-core/src/decode.rs

907 lines
31 KiB
Rust

use rayon::prelude::*;
use crate::error::CoreError;
use crate::error::CoreResult;
use crate::extent::value_byte_extent;
use crate::frame::Compression;
use crate::frame::Encoding;
use crate::frame::Frame;
use crate::frame::MessageHeader;
use crate::frame::decompress_ipc_body;
use crate::protocol::Attribute;
use crate::protocol::Primitive;
use crate::protocol::Shape;
use crate::protocol::TypeCode;
use crate::protocol::ValueType;
use crate::value::Atom;
use crate::value::Dictionary;
use crate::value::List;
use crate::value::Table;
use crate::value::Value;
use crate::value::Vector;
use crate::value::VectorData;
/// Fully decoded q IPC message.
#[derive(Clone, Debug, PartialEq)]
pub struct DecodedMessage {
header: MessageHeader,
value: Value,
}
impl DecodedMessage {
pub fn new(header: MessageHeader, value: Value) -> Self {
Self { header, value }
}
pub fn header(&self) -> MessageHeader {
self.header
}
pub fn value(&self) -> &Value {
&self.value
}
pub fn qtype(&self) -> ValueType {
self.value.qtype()
}
pub fn into_parts(self) -> (MessageHeader, Value) {
(self.header, self.value)
}
}
/// Options controlling how q IPC messages are decoded.
#[derive(Clone, Debug)]
pub struct DecodeOptions {
/// When `true` and the top-level value is a table with at least
/// `parallel_column_threshold` columns, columns are decoded in parallel
/// using rayon's thread pool.
pub parallel: bool,
/// Minimum number of columns required to trigger parallel decode.
pub parallel_column_threshold: usize,
}
impl Default for DecodeOptions {
fn default() -> Self {
Self {
parallel: true,
parallel_column_threshold: 4,
}
}
}
struct BodyReader {
bytes: bytes::Bytes,
offset: usize,
}
impl BodyReader {
fn new(bytes: bytes::Bytes) -> Self {
Self { bytes, offset: 0 }
}
fn remaining(&self) -> usize {
self.bytes.len().saturating_sub(self.offset)
}
fn read_exact<const N: usize>(&mut self) -> CoreResult<[u8; N]> {
let end = self
.offset
.checked_add(N)
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
let slice = self
.bytes
.get(self.offset..end)
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
self.offset = end;
Ok(slice.try_into().expect("fixed-size slice length checked"))
}
/// Returns a borrowed slice of `len` bytes and advances the offset.
fn read_slice(&mut self, len: usize) -> CoreResult<&[u8]> {
let end = self
.offset
.checked_add(len)
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
let slice = self
.bytes
.get(self.offset..end)
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
self.offset = end;
Ok(slice)
}
/// Returns a zero-copy Bytes wrapper of `len` bytes and advances the offset.
fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
let end = self
.offset
.checked_add(len)
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
if end > self.bytes.len() {
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
}
let slice = self.bytes.slice(self.offset..end);
self.offset = end;
Ok(slice)
}
/// Returns a `Bytes` wrapper of `count * size_of::<T>()` bytes, aligned for `T`.
///
/// If the current offset is already aligned for `T`, this is zero-copy
/// (a `Bytes::slice`). Otherwise it copies into a new aligned allocation.
fn read_bytes_aligned<T: bytemuck::Pod>(&mut self, count: usize) -> CoreResult<bytes::Bytes> {
let byte_len = count
.checked_mul(std::mem::size_of::<T>())
.ok_or(CoreError::LengthOverflow(count))?;
let end = self
.offset
.checked_add(byte_len)
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
if end > self.bytes.len() {
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
}
let ptr = self.bytes[self.offset..].as_ptr();
let align = std::mem::align_of::<T>();
let result = if (ptr as usize) % align == 0 {
// Already aligned — zero-copy slice.
self.bytes.slice(self.offset..end)
} else {
// Misaligned — must copy into an aligned allocation.
bytes::Bytes::copy_from_slice(&self.bytes[self.offset..end])
};
self.offset = end;
Ok(result)
}
fn read_u8(&mut self) -> CoreResult<u8> {
Ok(self.read_exact::<1>()?[0])
}
fn read_i8(&mut self) -> CoreResult<i8> {
Ok(self.read_u8()? as i8)
}
fn read_i16(&mut self) -> CoreResult<i16> {
Ok(i16::from_le_bytes(self.read_exact::<2>()?))
}
fn read_i32(&mut self) -> CoreResult<i32> {
Ok(i32::from_le_bytes(self.read_exact::<4>()?))
}
fn read_i64(&mut self) -> CoreResult<i64> {
Ok(i64::from_le_bytes(self.read_exact::<8>()?))
}
fn read_f32(&mut self) -> CoreResult<f32> {
Ok(f32::from_le_bytes(self.read_exact::<4>()?))
}
fn read_f64(&mut self) -> CoreResult<f64> {
Ok(f64::from_le_bytes(self.read_exact::<8>()?))
}
fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
self.read_exact::<16>()
}
fn read_length(&mut self) -> CoreResult<usize> {
let length = self.read_i32()?;
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
}
fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
let remaining = &self.bytes[self.offset..];
match memchr::memchr(0, remaining) {
Some(pos) => {
let symbol = self.bytes.slice(self.offset..self.offset + pos);
self.offset += pos + 1;
Ok(symbol)
}
None => Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into()),
}
}
/// Reads `count` elements of a fixed-width type as a bulk memcpy.
///
/// The wire bytes are reinterpreted directly into the target `Vec<T>` via
/// `bytemuck::cast_slice_mut`, avoiding per-element parsing. This is valid
/// because we only support little-endian payloads and all target platforms
/// are little-endian.
fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
&mut self,
count: usize,
) -> CoreResult<Vec<T>> {
let byte_len = count
.checked_mul(std::mem::size_of::<T>())
.ok_or(CoreError::LengthOverflow(count))?;
let bytes = self.read_slice(byte_len)?;
let mut values = vec![T::zeroed(); count];
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
dst.copy_from_slice(bytes);
Ok(values)
}
}
fn decode_atom(reader: &mut BodyReader, primitive: Primitive) -> CoreResult<Atom> {
Ok(match primitive {
Primitive::Boolean => Atom::Boolean(reader.read_u8()? != 0),
Primitive::Guid => Atom::Guid(reader.read_guid()?),
Primitive::Byte => Atom::Byte(reader.read_u8()?),
Primitive::Short => Atom::Short(reader.read_i16()?),
Primitive::Int => Atom::Int(reader.read_i32()?),
Primitive::Long => Atom::Long(reader.read_i64()?),
Primitive::Real => Atom::Real(reader.read_f32()?),
Primitive::Float => Atom::Float(reader.read_f64()?),
Primitive::Char => Atom::Char(reader.read_u8()?),
Primitive::Symbol => Atom::Symbol(reader.read_symbol()?),
Primitive::Timestamp => Atom::Timestamp(reader.read_i64()?),
Primitive::Month => Atom::Month(reader.read_i32()?),
Primitive::Date => Atom::Date(reader.read_i32()?),
Primitive::Datetime => Atom::Datetime(reader.read_f64()?),
Primitive::Timespan => Atom::Timespan(reader.read_i64()?),
Primitive::Minute => Atom::Minute(reader.read_i32()?),
Primitive::Second => Atom::Second(reader.read_i32()?),
Primitive::Time => Atom::Time(reader.read_i32()?),
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
})
}
fn decode_vector(
reader: &mut BodyReader,
primitive: Primitive,
attribute: Attribute,
length: usize,
) -> CoreResult<Vector> {
let data = match primitive {
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length)?),
Primitive::Guid => VectorData::Guid(
reader.read_bytes(
length
.checked_mul(16)
.ok_or(CoreError::LengthOverflow(length))?,
)?,
),
Primitive::Byte => VectorData::Byte(reader.read_bytes(length)?),
Primitive::Short => VectorData::Short(reader.read_bytes_aligned::<i16>(length)?),
Primitive::Int => VectorData::Int(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Long => VectorData::Long(reader.read_bytes_aligned::<i64>(length)?),
Primitive::Real => VectorData::Real(reader.read_bytes_aligned::<f32>(length)?),
Primitive::Float => VectorData::Float(reader.read_bytes_aligned::<f64>(length)?),
Primitive::Char => VectorData::Char(reader.read_bytes(length)?),
Primitive::Symbol => {
let mut values = Vec::with_capacity(length);
for _ in 0..length {
values.push(reader.read_symbol()?);
}
VectorData::Symbol(values)
}
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes_aligned::<i64>(length)?),
Primitive::Month => VectorData::Month(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Date => VectorData::Date(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Datetime => VectorData::Datetime(reader.read_bytes_aligned::<f64>(length)?),
Primitive::Timespan => VectorData::Timespan(reader.read_bytes_aligned::<i64>(length)?),
Primitive::Minute => VectorData::Minute(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Second => VectorData::Second(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Time => VectorData::Time(reader.read_bytes_aligned::<i32>(length)?),
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
};
Ok(Vector::new(attribute, data))
}
pub(crate) fn extract_symbol_names(value: &Value) -> CoreResult<Vec<bytes::Bytes>> {
match value {
Value::Vector(vector) => match vector.data() {
VectorData::Symbol(values) => Ok(values.clone()),
_ => Err(CoreError::InvalidStructure(
"q table column names must be a symbol vector".to_string(),
)),
},
_ => Err(CoreError::InvalidStructure(
"q table column names must be encoded as a symbol vector".to_string(),
)),
}
}
pub(crate) fn extract_columns(value: &Value) -> CoreResult<Vec<Value>> {
match value {
Value::List(list) => Ok(list.values().to_vec()),
_ => Err(CoreError::InvalidStructure(
"q table columns must be encoded as a general list".to_string(),
)),
}
}
fn decode_inner(reader: &mut BodyReader) -> CoreResult<Value> {
let type_code = TypeCode::try_from(reader.read_i8()?)?;
match type_code.shape() {
Shape::Atom => Ok(Value::Atom(decode_atom(
reader,
type_code
.primitive()
.expect("atom types always have a primitive"),
)?)),
Shape::Vector => {
let attribute = Attribute::try_from(reader.read_i8()?)?;
let length = reader.read_length()?;
Ok(Value::Vector(decode_vector(
reader,
type_code
.primitive()
.expect("vector types always have a primitive"),
attribute,
length,
)?))
}
Shape::List => {
let attribute = Attribute::try_from(reader.read_i8()?)?;
let length = reader.read_length()?;
let mut values = Vec::with_capacity(length);
for _ in 0..length {
values.push(decode_inner(reader)?);
}
Ok(Value::List(List::new(attribute, values)))
}
Shape::Dictionary => {
let sorted = matches!(type_code, TypeCode::SortedDictionary);
let keys = decode_inner(reader)?;
let values = decode_inner(reader)?;
let dictionary = Dictionary::new(sorted, keys, values);
dictionary.validate()?;
Ok(Value::Dictionary(dictionary))
}
Shape::Table => {
let attribute = Attribute::try_from(reader.read_i8()?)?;
let encoded_dictionary = decode_inner(reader)?;
let Value::Dictionary(dictionary) = encoded_dictionary else {
return Err(CoreError::InvalidStructure(
"q table payload must contain a dictionary body".to_string(),
));
};
let column_names = extract_symbol_names(dictionary.keys())?;
let columns = extract_columns(dictionary.values())?;
let table = Table::new(attribute, column_names, columns);
table.validate()?;
Ok(Value::Table(table))
}
Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
opcode: reader.read_i8()?,
}),
Shape::Error => {
let error_msg = reader.read_symbol()?;
Err(CoreError::QRuntime(
String::from_utf8_lossy(&error_msg).into(),
))
}
}
}
/// Parsed table preamble: everything before the column data.
struct TablePreamble {
attribute: Attribute,
column_names: Vec<bytes::Bytes>,
/// Byte offset within the body where column values start (past the
/// general-list header).
columns_start: usize,
num_columns: usize,
}
/// Parses the table header, dictionary keys (column names), and list header.
///
/// Shared by both the sequential and parallel table decode paths.
fn parse_table_preamble(body: &bytes::Bytes) -> CoreResult<TablePreamble> {
let mut reader = BodyReader::new(body.clone());
// Table: type(1) + attribute(1)
let _type_code = reader.read_i8()?; // 98 = Table
let attribute = Attribute::try_from(reader.read_i8()?)?;
// Dictionary: type(1) + keys + values
let dict_type = TypeCode::try_from(reader.read_i8()?)?;
if !matches!(dict_type, TypeCode::Dictionary | TypeCode::SortedDictionary) {
return Err(CoreError::InvalidStructure(
"q table payload must contain a dictionary body".to_string(),
));
}
// Keys = symbol vector (column names)
let keys = decode_inner(&mut reader)?;
let column_names = extract_symbol_names(&keys)?;
// Values = general list: type(1) + attr(1) + length(4) + column values
let list_type = reader.read_i8()?;
if list_type != 0 {
return Err(CoreError::InvalidStructure(
"q table columns must be encoded as a general list".to_string(),
));
}
let _list_attr = reader.read_i8()?;
let num_columns = reader.read_length()?;
if num_columns != column_names.len() {
return Err(CoreError::InvalidStructure(format!(
"table has {} column names but {} column values",
column_names.len(),
num_columns
)));
}
Ok(TablePreamble {
attribute,
column_names,
columns_start: reader.offset,
num_columns,
})
}
/// Attempts parallel table decode. Returns `None` if the column count is
/// below the threshold, allowing the caller to fall back to sequential.
fn try_decode_table_parallel(body: bytes::Bytes, threshold: usize) -> CoreResult<Option<Value>> {
let preamble = parse_table_preamble(&body)?;
if preamble.num_columns < threshold {
return Ok(None);
}
// Use value_byte_extent to find each column's byte range without parsing
let mut column_ranges: Vec<(usize, usize)> = Vec::with_capacity(preamble.num_columns);
let mut scan = preamble.columns_start;
for _ in 0..preamble.num_columns {
let extent = value_byte_extent(&body, scan)?;
column_ranges.push((scan, scan + extent));
scan += extent;
}
// Parallel decode: each column gets its own byte slice
let columns: Vec<CoreResult<Value>> = column_ranges
.par_iter()
.map(|&(start, end)| {
let mut col_reader = BodyReader::new(body.slice(start..end));
let value = decode_inner(&mut col_reader)?;
if col_reader.remaining() != 0 {
return Err(CoreError::TrailingBodyBytes(col_reader.remaining()));
}
Ok(value)
})
.collect();
let columns: Vec<Value> = columns.into_iter().collect::<CoreResult<Vec<_>>>()?;
let table = Table::new(preamble.attribute, preamble.column_names, columns);
table.validate()?;
Ok(Some(Value::Table(table)))
}
/// Decodes one q value body from a little-endian byte slice.
///
/// Returns `UnsupportedEndianness` for big-endian payloads.
pub fn decode_value(body: bytes::Bytes, encoding: Encoding) -> CoreResult<Value> {
decode_value_with_options(body, encoding, &DecodeOptions::default())
}
/// Decodes one q value body with configurable options.
///
/// When `options.parallel` is `true` and the body contains a table with
/// enough columns, columns are decoded in parallel using rayon.
pub fn decode_value_with_options(
body: bytes::Bytes,
encoding: Encoding,
options: &DecodeOptions,
) -> CoreResult<Value> {
if encoding != Encoding::LittleEndian {
return Err(CoreError::UnsupportedEndianness(encoding));
}
// Fast path: parallel table decode
if options.parallel && body.first() == Some(&98) {
if let Some(table) =
try_decode_table_parallel(body.clone(), options.parallel_column_threshold)?
{
return Ok(table);
}
}
let mut reader = BodyReader::new(body);
let value = decode_inner(&mut reader)?;
if reader.remaining() != 0 {
return Err(CoreError::TrailingBodyBytes(reader.remaining()));
}
Ok(value)
}
/// Decodes a full q IPC frame into its header and value.
///
/// Returns `UnsupportedEndianness` for big-endian payloads.
pub fn decode_message(frame_bytes: bytes::Bytes) -> CoreResult<DecodedMessage> {
decode_message_with_options(frame_bytes, &DecodeOptions::default())
}
/// Decodes a full q IPC frame with configurable options.
pub fn decode_message_with_options(
frame_bytes: bytes::Bytes,
options: &DecodeOptions,
) -> CoreResult<DecodedMessage> {
let frame = Frame::parse(&frame_bytes)?;
let header = frame.header();
if header.encoding() != Encoding::LittleEndian {
return Err(CoreError::UnsupportedEndianness(header.encoding()));
}
if header.compression() != Compression::Uncompressed {
let decompressed = decompress_ipc_body(frame.body(), header.encoding())?;
let value = decode_value_with_options(
bytes::Bytes::from(decompressed),
header.encoding(),
options,
)?;
return Ok(DecodedMessage::new(header, value));
}
let value = decode_value_with_options(
frame_bytes.slice(crate::frame::HEADER_LEN..),
header.encoding(),
options,
)?;
Ok(DecodedMessage::new(header, value))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::protocol::Attribute;
#[test]
fn decode_int_atom_body() {
let value = decode_value(
bytes::Bytes::from(vec![i8::from(TypeCode::IntAtom) as u8, 42, 0, 0, 0]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(value, Value::Atom(Atom::Int(42)));
assert_eq!(value.qtype(), ValueType::atom(Primitive::Int));
}
#[test]
fn decode_int_vector_body() {
let value = decode_value(
bytes::Bytes::from_static(&[6_u8, 1, 3, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(
value,
Value::Vector(Vector::new(
Attribute::Sorted,
VectorData::from_i32s(&[1, 2, 3]),
))
);
}
#[test]
fn decode_symbol_atom_body() {
let value = decode_value(
bytes::Bytes::from_static(&[245_u8, b'a', b'b', 0]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(
value,
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
);
}
#[test]
fn decode_list_body() {
let value = decode_value(
bytes::Bytes::from_static(&[0_u8, 0, 2, 0, 0, 0, 250, 42, 0, 0, 0, 245, b'a', b'b', 0]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(
value,
Value::List(List::new(
Attribute::None,
vec![
Value::Atom(Atom::Int(42)),
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
],
))
);
}
#[test]
fn decode_dictionary_body() {
let value = decode_value(
bytes::Bytes::from_static(&[
99_u8, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0,
0,
]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(
value,
Value::Dictionary(Dictionary::new(
false,
Value::Vector(Vector::new(
Attribute::None,
VectorData::Symbol(vec![
bytes::Bytes::from_static(b"a"),
bytes::Bytes::from_static(b"b")
]),
)),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]),)),
))
);
}
#[test]
fn decode_table_body() {
let value = decode_value(
bytes::Bytes::from_static(&[
98_u8, 0, 99, 11, 0, 2, 0, 0, 0, b's', b'y', b'm', 0, b'p', b'x', 0, 0, 0, 2, 0, 0,
0, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0,
]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(
value,
Value::Table(Table::new(
Attribute::None,
vec![
bytes::Bytes::from_static(b"sym"),
bytes::Bytes::from_static(b"px")
],
vec![
Value::Vector(Vector::new(
Attribute::None,
VectorData::Symbol(vec![
bytes::Bytes::from_static(b"a"),
bytes::Bytes::from_static(b"b")
]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_i32s(&[10, 20]),
)),
],
))
);
}
#[test]
fn decode_unary_primitive_body() {
let value = decode_value(
bytes::Bytes::from_static(&[101_u8, 0]),
Encoding::LittleEndian,
)
.unwrap();
assert_eq!(value, Value::UnaryPrimitive { opcode: 0 });
}
#[test]
fn decode_rejects_trailing_bytes() {
assert!(matches!(
decode_value(
bytes::Bytes::from_static(&[250_u8, 42, 0, 0, 0, 99]),
Encoding::LittleEndian
),
Err(CoreError::TrailingBodyBytes(1))
));
}
#[test]
fn decode_rejects_malformed_table_structure() {
let err = decode_value(
bytes::Bytes::from_static(&[
98_u8, 0, 99, 11, 0, 1, 0, 0, 0, b'x', 0, 250, 42, 0, 0, 0,
]),
Encoding::LittleEndian,
)
.unwrap_err();
assert!(matches!(err, CoreError::InvalidStructure(_)));
}
#[test]
fn decode_rejects_big_endian() {
assert!(matches!(
decode_value(
bytes::Bytes::from_static(&[250_u8, 0, 0, 0, 42]),
Encoding::BigEndian
),
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
));
}
// -- Parallel decode tests --
use crate::encode::encode_value;
/// Helper: encode a table, decode with parallel=true and parallel=false,
/// and verify both produce identical results.
fn assert_parallel_matches_sequential(table: &Value) {
let body = encode_value(table, Encoding::LittleEndian).unwrap();
let seq_opts = DecodeOptions {
parallel: false,
..Default::default()
};
let par_opts = DecodeOptions {
parallel: true,
parallel_column_threshold: 1, // force parallel even for small tables
};
let seq = decode_value_with_options(
bytes::Bytes::from(body.clone()),
Encoding::LittleEndian,
&seq_opts,
)
.unwrap();
let par = decode_value_with_options(
bytes::Bytes::from(body.clone()),
Encoding::LittleEndian,
&par_opts,
)
.unwrap();
assert_eq!(seq, par, "parallel decode must match sequential decode");
assert_eq!(&seq, table, "decoded value must match original");
}
#[test]
fn parallel_decode_multi_column_table() {
let table = Value::Table(Table::new(
Attribute::None,
vec![
bytes::Bytes::from_static(b"a"),
bytes::Bytes::from_static(b"b"),
bytes::Bytes::from_static(b"c"),
bytes::Bytes::from_static(b"d"),
],
vec![
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_i32s(&[1, 2, 3]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::Symbol(vec![
bytes::Bytes::from_static(b"x"),
bytes::Bytes::from_static(b"y"),
bytes::Bytes::from_static(b"z"),
]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_i64s(&[100, 200, 300]),
)),
],
));
assert_parallel_matches_sequential(&table);
}
#[test]
fn parallel_decode_mixed_type_columns() {
let table = Value::Table(Table::new(
Attribute::None,
vec![
bytes::Bytes::from_static(b"bools"),
bytes::Bytes::from_static(b"guids"),
bytes::Bytes::from_static(b"chars"),
bytes::Bytes::from_static(b"times"),
bytes::Bytes::from_static(b"dates"),
],
vec![
Value::Vector(Vector::new(
Attribute::None,
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0])),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::Char(bytes::Bytes::from_static(b"ab")),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_times(&[1000, 2000]),
)),
Value::Vector(Vector::new(
Attribute::None,
VectorData::from_dates(&[100, 200]),
)),
],
));
assert_parallel_matches_sequential(&table);
}
#[test]
fn parallel_decode_below_threshold_falls_back_to_sequential() {
// 2 columns, threshold 4 → should use sequential path
let table = Value::Table(Table::new(
Attribute::None,
vec![
bytes::Bytes::from_static(b"a"),
bytes::Bytes::from_static(b"b"),
],
vec![
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3, 4]))),
],
));
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
let opts = DecodeOptions {
parallel: true,
parallel_column_threshold: 4,
};
let decoded = decode_value_with_options(
bytes::Bytes::from(body.clone()),
Encoding::LittleEndian,
&opts,
)
.unwrap();
assert_eq!(decoded, table);
}
#[test]
fn parallel_decode_non_table_ignores_parallel_flag() {
// Non-table values should decode normally regardless of parallel flag
let value = Value::Atom(Atom::Int(42));
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
let opts = DecodeOptions {
parallel: true,
parallel_column_threshold: 1,
};
let decoded = decode_value_with_options(
bytes::Bytes::from(body.clone()),
Encoding::LittleEndian,
&opts,
)
.unwrap();
assert_eq!(decoded, value);
}
#[test]
fn parse_table_preamble_correct() {
let table = Value::Table(Table::new(
Attribute::None,
vec![
bytes::Bytes::from_static(b"a"),
bytes::Bytes::from_static(b"b"),
bytes::Bytes::from_static(b"c"),
bytes::Bytes::from_static(b"d"),
bytes::Bytes::from_static(b"e"),
],
vec![
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1]))),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[2]))),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3]))),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[4]))),
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[5]))),
],
));
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
let preamble = parse_table_preamble(&bytes::Bytes::from(body)).unwrap();
assert_eq!(preamble.num_columns, 5);
assert_eq!(preamble.column_names.len(), 5);
assert_eq!(&preamble.column_names[0][..], b"a");
assert_eq!(&preamble.column_names[4][..], b"e");
}
}