907 lines
31 KiB
Rust
907 lines
31 KiB
Rust
use rayon::prelude::*;
|
|
|
|
use crate::error::CoreError;
|
|
use crate::error::CoreResult;
|
|
use crate::extent::value_byte_extent;
|
|
use crate::frame::Compression;
|
|
use crate::frame::Encoding;
|
|
use crate::frame::Frame;
|
|
use crate::frame::MessageHeader;
|
|
use crate::frame::decompress_ipc_body;
|
|
use crate::protocol::Attribute;
|
|
use crate::protocol::Primitive;
|
|
use crate::protocol::Shape;
|
|
use crate::protocol::TypeCode;
|
|
use crate::protocol::ValueType;
|
|
use crate::value::Atom;
|
|
use crate::value::Dictionary;
|
|
use crate::value::List;
|
|
use crate::value::Table;
|
|
use crate::value::Value;
|
|
use crate::value::Vector;
|
|
use crate::value::VectorData;
|
|
|
|
/// Fully decoded q IPC message.
|
|
#[derive(Clone, Debug, PartialEq)]
|
|
pub struct DecodedMessage {
|
|
header: MessageHeader,
|
|
value: Value,
|
|
}
|
|
|
|
impl DecodedMessage {
|
|
pub fn new(header: MessageHeader, value: Value) -> Self {
|
|
Self { header, value }
|
|
}
|
|
|
|
pub fn header(&self) -> MessageHeader {
|
|
self.header
|
|
}
|
|
|
|
pub fn value(&self) -> &Value {
|
|
&self.value
|
|
}
|
|
|
|
pub fn qtype(&self) -> ValueType {
|
|
self.value.qtype()
|
|
}
|
|
|
|
pub fn into_parts(self) -> (MessageHeader, Value) {
|
|
(self.header, self.value)
|
|
}
|
|
}
|
|
|
|
/// Options controlling how q IPC messages are decoded.
|
|
#[derive(Clone, Debug)]
|
|
pub struct DecodeOptions {
|
|
/// When `true` and the top-level value is a table with at least
|
|
/// `parallel_column_threshold` columns, columns are decoded in parallel
|
|
/// using rayon's thread pool.
|
|
pub parallel: bool,
|
|
/// Minimum number of columns required to trigger parallel decode.
|
|
pub parallel_column_threshold: usize,
|
|
}
|
|
|
|
impl Default for DecodeOptions {
|
|
fn default() -> Self {
|
|
Self {
|
|
parallel: true,
|
|
parallel_column_threshold: 4,
|
|
}
|
|
}
|
|
}
|
|
|
|
struct BodyReader {
|
|
bytes: bytes::Bytes,
|
|
offset: usize,
|
|
}
|
|
|
|
impl BodyReader {
|
|
fn new(bytes: bytes::Bytes) -> Self {
|
|
Self { bytes, offset: 0 }
|
|
}
|
|
|
|
fn remaining(&self) -> usize {
|
|
self.bytes.len().saturating_sub(self.offset)
|
|
}
|
|
|
|
fn read_exact<const N: usize>(&mut self) -> CoreResult<[u8; N]> {
|
|
let end = self
|
|
.offset
|
|
.checked_add(N)
|
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
|
let slice = self
|
|
.bytes
|
|
.get(self.offset..end)
|
|
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
|
self.offset = end;
|
|
Ok(slice.try_into().expect("fixed-size slice length checked"))
|
|
}
|
|
|
|
/// Returns a borrowed slice of `len` bytes and advances the offset.
|
|
fn read_slice(&mut self, len: usize) -> CoreResult<&[u8]> {
|
|
let end = self
|
|
.offset
|
|
.checked_add(len)
|
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
|
let slice = self
|
|
.bytes
|
|
.get(self.offset..end)
|
|
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
|
self.offset = end;
|
|
Ok(slice)
|
|
}
|
|
|
|
/// Returns a zero-copy Bytes wrapper of `len` bytes and advances the offset.
|
|
fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
|
|
let end = self
|
|
.offset
|
|
.checked_add(len)
|
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
|
if end > self.bytes.len() {
|
|
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
|
}
|
|
let slice = self.bytes.slice(self.offset..end);
|
|
self.offset = end;
|
|
Ok(slice)
|
|
}
|
|
|
|
/// Returns a `Bytes` wrapper of `count * size_of::<T>()` bytes, aligned for `T`.
|
|
///
|
|
/// If the current offset is already aligned for `T`, this is zero-copy
|
|
/// (a `Bytes::slice`). Otherwise it copies into a new aligned allocation.
|
|
fn read_bytes_aligned<T: bytemuck::Pod>(&mut self, count: usize) -> CoreResult<bytes::Bytes> {
|
|
let byte_len = count
|
|
.checked_mul(std::mem::size_of::<T>())
|
|
.ok_or(CoreError::LengthOverflow(count))?;
|
|
let end = self
|
|
.offset
|
|
.checked_add(byte_len)
|
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
|
if end > self.bytes.len() {
|
|
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
|
}
|
|
let ptr = self.bytes[self.offset..].as_ptr();
|
|
let align = std::mem::align_of::<T>();
|
|
let result = if (ptr as usize) % align == 0 {
|
|
// Already aligned — zero-copy slice.
|
|
self.bytes.slice(self.offset..end)
|
|
} else {
|
|
// Misaligned — must copy into an aligned allocation.
|
|
bytes::Bytes::copy_from_slice(&self.bytes[self.offset..end])
|
|
};
|
|
self.offset = end;
|
|
Ok(result)
|
|
}
|
|
|
|
fn read_u8(&mut self) -> CoreResult<u8> {
|
|
Ok(self.read_exact::<1>()?[0])
|
|
}
|
|
|
|
fn read_i8(&mut self) -> CoreResult<i8> {
|
|
Ok(self.read_u8()? as i8)
|
|
}
|
|
|
|
fn read_i16(&mut self) -> CoreResult<i16> {
|
|
Ok(i16::from_le_bytes(self.read_exact::<2>()?))
|
|
}
|
|
|
|
fn read_i32(&mut self) -> CoreResult<i32> {
|
|
Ok(i32::from_le_bytes(self.read_exact::<4>()?))
|
|
}
|
|
|
|
fn read_i64(&mut self) -> CoreResult<i64> {
|
|
Ok(i64::from_le_bytes(self.read_exact::<8>()?))
|
|
}
|
|
|
|
fn read_f32(&mut self) -> CoreResult<f32> {
|
|
Ok(f32::from_le_bytes(self.read_exact::<4>()?))
|
|
}
|
|
|
|
fn read_f64(&mut self) -> CoreResult<f64> {
|
|
Ok(f64::from_le_bytes(self.read_exact::<8>()?))
|
|
}
|
|
|
|
fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
|
|
self.read_exact::<16>()
|
|
}
|
|
|
|
fn read_length(&mut self) -> CoreResult<usize> {
|
|
let length = self.read_i32()?;
|
|
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
|
|
}
|
|
|
|
fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
|
|
let remaining = &self.bytes[self.offset..];
|
|
match memchr::memchr(0, remaining) {
|
|
Some(pos) => {
|
|
let symbol = self.bytes.slice(self.offset..self.offset + pos);
|
|
self.offset += pos + 1;
|
|
Ok(symbol)
|
|
}
|
|
None => Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into()),
|
|
}
|
|
}
|
|
|
|
/// Reads `count` elements of a fixed-width type as a bulk memcpy.
|
|
///
|
|
/// The wire bytes are reinterpreted directly into the target `Vec<T>` via
|
|
/// `bytemuck::cast_slice_mut`, avoiding per-element parsing. This is valid
|
|
/// because we only support little-endian payloads and all target platforms
|
|
/// are little-endian.
|
|
fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
|
|
&mut self,
|
|
count: usize,
|
|
) -> CoreResult<Vec<T>> {
|
|
let byte_len = count
|
|
.checked_mul(std::mem::size_of::<T>())
|
|
.ok_or(CoreError::LengthOverflow(count))?;
|
|
let bytes = self.read_slice(byte_len)?;
|
|
let mut values = vec![T::zeroed(); count];
|
|
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
|
|
dst.copy_from_slice(bytes);
|
|
Ok(values)
|
|
}
|
|
}
|
|
|
|
fn decode_atom(reader: &mut BodyReader, primitive: Primitive) -> CoreResult<Atom> {
|
|
Ok(match primitive {
|
|
Primitive::Boolean => Atom::Boolean(reader.read_u8()? != 0),
|
|
Primitive::Guid => Atom::Guid(reader.read_guid()?),
|
|
Primitive::Byte => Atom::Byte(reader.read_u8()?),
|
|
Primitive::Short => Atom::Short(reader.read_i16()?),
|
|
Primitive::Int => Atom::Int(reader.read_i32()?),
|
|
Primitive::Long => Atom::Long(reader.read_i64()?),
|
|
Primitive::Real => Atom::Real(reader.read_f32()?),
|
|
Primitive::Float => Atom::Float(reader.read_f64()?),
|
|
Primitive::Char => Atom::Char(reader.read_u8()?),
|
|
Primitive::Symbol => Atom::Symbol(reader.read_symbol()?),
|
|
Primitive::Timestamp => Atom::Timestamp(reader.read_i64()?),
|
|
Primitive::Month => Atom::Month(reader.read_i32()?),
|
|
Primitive::Date => Atom::Date(reader.read_i32()?),
|
|
Primitive::Datetime => Atom::Datetime(reader.read_f64()?),
|
|
Primitive::Timespan => Atom::Timespan(reader.read_i64()?),
|
|
Primitive::Minute => Atom::Minute(reader.read_i32()?),
|
|
Primitive::Second => Atom::Second(reader.read_i32()?),
|
|
Primitive::Time => Atom::Time(reader.read_i32()?),
|
|
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
|
|
})
|
|
}
|
|
|
|
fn decode_vector(
|
|
reader: &mut BodyReader,
|
|
primitive: Primitive,
|
|
attribute: Attribute,
|
|
length: usize,
|
|
) -> CoreResult<Vector> {
|
|
let data = match primitive {
|
|
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length)?),
|
|
Primitive::Guid => VectorData::Guid(
|
|
reader.read_bytes(
|
|
length
|
|
.checked_mul(16)
|
|
.ok_or(CoreError::LengthOverflow(length))?,
|
|
)?,
|
|
),
|
|
Primitive::Byte => VectorData::Byte(reader.read_bytes(length)?),
|
|
Primitive::Short => VectorData::Short(reader.read_bytes_aligned::<i16>(length)?),
|
|
Primitive::Int => VectorData::Int(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Long => VectorData::Long(reader.read_bytes_aligned::<i64>(length)?),
|
|
Primitive::Real => VectorData::Real(reader.read_bytes_aligned::<f32>(length)?),
|
|
Primitive::Float => VectorData::Float(reader.read_bytes_aligned::<f64>(length)?),
|
|
Primitive::Char => VectorData::Char(reader.read_bytes(length)?),
|
|
Primitive::Symbol => {
|
|
let mut values = Vec::with_capacity(length);
|
|
for _ in 0..length {
|
|
values.push(reader.read_symbol()?);
|
|
}
|
|
VectorData::Symbol(values)
|
|
}
|
|
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes_aligned::<i64>(length)?),
|
|
Primitive::Month => VectorData::Month(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Date => VectorData::Date(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Datetime => VectorData::Datetime(reader.read_bytes_aligned::<f64>(length)?),
|
|
Primitive::Timespan => VectorData::Timespan(reader.read_bytes_aligned::<i64>(length)?),
|
|
Primitive::Minute => VectorData::Minute(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Second => VectorData::Second(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Time => VectorData::Time(reader.read_bytes_aligned::<i32>(length)?),
|
|
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
|
|
};
|
|
|
|
Ok(Vector::new(attribute, data))
|
|
}
|
|
|
|
pub(crate) fn extract_symbol_names(value: &Value) -> CoreResult<Vec<bytes::Bytes>> {
|
|
match value {
|
|
Value::Vector(vector) => match vector.data() {
|
|
VectorData::Symbol(values) => Ok(values.clone()),
|
|
_ => Err(CoreError::InvalidStructure(
|
|
"q table column names must be a symbol vector".to_string(),
|
|
)),
|
|
},
|
|
_ => Err(CoreError::InvalidStructure(
|
|
"q table column names must be encoded as a symbol vector".to_string(),
|
|
)),
|
|
}
|
|
}
|
|
|
|
pub(crate) fn extract_columns(value: &Value) -> CoreResult<Vec<Value>> {
|
|
match value {
|
|
Value::List(list) => Ok(list.values().to_vec()),
|
|
_ => Err(CoreError::InvalidStructure(
|
|
"q table columns must be encoded as a general list".to_string(),
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn decode_inner(reader: &mut BodyReader) -> CoreResult<Value> {
|
|
let type_code = TypeCode::try_from(reader.read_i8()?)?;
|
|
match type_code.shape() {
|
|
Shape::Atom => Ok(Value::Atom(decode_atom(
|
|
reader,
|
|
type_code
|
|
.primitive()
|
|
.expect("atom types always have a primitive"),
|
|
)?)),
|
|
Shape::Vector => {
|
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
|
let length = reader.read_length()?;
|
|
Ok(Value::Vector(decode_vector(
|
|
reader,
|
|
type_code
|
|
.primitive()
|
|
.expect("vector types always have a primitive"),
|
|
attribute,
|
|
length,
|
|
)?))
|
|
}
|
|
Shape::List => {
|
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
|
let length = reader.read_length()?;
|
|
let mut values = Vec::with_capacity(length);
|
|
for _ in 0..length {
|
|
values.push(decode_inner(reader)?);
|
|
}
|
|
Ok(Value::List(List::new(attribute, values)))
|
|
}
|
|
Shape::Dictionary => {
|
|
let sorted = matches!(type_code, TypeCode::SortedDictionary);
|
|
let keys = decode_inner(reader)?;
|
|
let values = decode_inner(reader)?;
|
|
let dictionary = Dictionary::new(sorted, keys, values);
|
|
dictionary.validate()?;
|
|
Ok(Value::Dictionary(dictionary))
|
|
}
|
|
Shape::Table => {
|
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
|
let encoded_dictionary = decode_inner(reader)?;
|
|
let Value::Dictionary(dictionary) = encoded_dictionary else {
|
|
return Err(CoreError::InvalidStructure(
|
|
"q table payload must contain a dictionary body".to_string(),
|
|
));
|
|
};
|
|
let column_names = extract_symbol_names(dictionary.keys())?;
|
|
let columns = extract_columns(dictionary.values())?;
|
|
let table = Table::new(attribute, column_names, columns);
|
|
table.validate()?;
|
|
Ok(Value::Table(table))
|
|
}
|
|
Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
|
|
opcode: reader.read_i8()?,
|
|
}),
|
|
Shape::Error => {
|
|
let error_msg = reader.read_symbol()?;
|
|
Err(CoreError::QRuntime(
|
|
String::from_utf8_lossy(&error_msg).into(),
|
|
))
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Parsed table preamble: everything before the column data.
|
|
struct TablePreamble {
|
|
attribute: Attribute,
|
|
column_names: Vec<bytes::Bytes>,
|
|
/// Byte offset within the body where column values start (past the
|
|
/// general-list header).
|
|
columns_start: usize,
|
|
num_columns: usize,
|
|
}
|
|
|
|
/// Parses the table header, dictionary keys (column names), and list header.
|
|
///
|
|
/// Shared by both the sequential and parallel table decode paths.
|
|
fn parse_table_preamble(body: &bytes::Bytes) -> CoreResult<TablePreamble> {
|
|
let mut reader = BodyReader::new(body.clone());
|
|
|
|
// Table: type(1) + attribute(1)
|
|
let _type_code = reader.read_i8()?; // 98 = Table
|
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
|
|
|
// Dictionary: type(1) + keys + values
|
|
let dict_type = TypeCode::try_from(reader.read_i8()?)?;
|
|
if !matches!(dict_type, TypeCode::Dictionary | TypeCode::SortedDictionary) {
|
|
return Err(CoreError::InvalidStructure(
|
|
"q table payload must contain a dictionary body".to_string(),
|
|
));
|
|
}
|
|
|
|
// Keys = symbol vector (column names)
|
|
let keys = decode_inner(&mut reader)?;
|
|
let column_names = extract_symbol_names(&keys)?;
|
|
|
|
// Values = general list: type(1) + attr(1) + length(4) + column values
|
|
let list_type = reader.read_i8()?;
|
|
if list_type != 0 {
|
|
return Err(CoreError::InvalidStructure(
|
|
"q table columns must be encoded as a general list".to_string(),
|
|
));
|
|
}
|
|
let _list_attr = reader.read_i8()?;
|
|
let num_columns = reader.read_length()?;
|
|
|
|
if num_columns != column_names.len() {
|
|
return Err(CoreError::InvalidStructure(format!(
|
|
"table has {} column names but {} column values",
|
|
column_names.len(),
|
|
num_columns
|
|
)));
|
|
}
|
|
|
|
Ok(TablePreamble {
|
|
attribute,
|
|
column_names,
|
|
columns_start: reader.offset,
|
|
num_columns,
|
|
})
|
|
}
|
|
|
|
/// Attempts parallel table decode. Returns `None` if the column count is
|
|
/// below the threshold, allowing the caller to fall back to sequential.
|
|
fn try_decode_table_parallel(body: bytes::Bytes, threshold: usize) -> CoreResult<Option<Value>> {
|
|
let preamble = parse_table_preamble(&body)?;
|
|
|
|
if preamble.num_columns < threshold {
|
|
return Ok(None);
|
|
}
|
|
|
|
// Use value_byte_extent to find each column's byte range without parsing
|
|
let mut column_ranges: Vec<(usize, usize)> = Vec::with_capacity(preamble.num_columns);
|
|
let mut scan = preamble.columns_start;
|
|
for _ in 0..preamble.num_columns {
|
|
let extent = value_byte_extent(&body, scan)?;
|
|
column_ranges.push((scan, scan + extent));
|
|
scan += extent;
|
|
}
|
|
|
|
// Parallel decode: each column gets its own byte slice
|
|
let columns: Vec<CoreResult<Value>> = column_ranges
|
|
.par_iter()
|
|
.map(|&(start, end)| {
|
|
let mut col_reader = BodyReader::new(body.slice(start..end));
|
|
let value = decode_inner(&mut col_reader)?;
|
|
if col_reader.remaining() != 0 {
|
|
return Err(CoreError::TrailingBodyBytes(col_reader.remaining()));
|
|
}
|
|
Ok(value)
|
|
})
|
|
.collect();
|
|
|
|
let columns: Vec<Value> = columns.into_iter().collect::<CoreResult<Vec<_>>>()?;
|
|
|
|
let table = Table::new(preamble.attribute, preamble.column_names, columns);
|
|
table.validate()?;
|
|
Ok(Some(Value::Table(table)))
|
|
}
|
|
|
|
/// Decodes one q value body from a little-endian byte slice.
|
|
///
|
|
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
|
pub fn decode_value(body: bytes::Bytes, encoding: Encoding) -> CoreResult<Value> {
|
|
decode_value_with_options(body, encoding, &DecodeOptions::default())
|
|
}
|
|
|
|
/// Decodes one q value body with configurable options.
|
|
///
|
|
/// When `options.parallel` is `true` and the body contains a table with
|
|
/// enough columns, columns are decoded in parallel using rayon.
|
|
pub fn decode_value_with_options(
|
|
body: bytes::Bytes,
|
|
encoding: Encoding,
|
|
options: &DecodeOptions,
|
|
) -> CoreResult<Value> {
|
|
if encoding != Encoding::LittleEndian {
|
|
return Err(CoreError::UnsupportedEndianness(encoding));
|
|
}
|
|
|
|
// Fast path: parallel table decode
|
|
if options.parallel && body.first() == Some(&98) {
|
|
if let Some(table) =
|
|
try_decode_table_parallel(body.clone(), options.parallel_column_threshold)?
|
|
{
|
|
return Ok(table);
|
|
}
|
|
}
|
|
|
|
let mut reader = BodyReader::new(body);
|
|
let value = decode_inner(&mut reader)?;
|
|
if reader.remaining() != 0 {
|
|
return Err(CoreError::TrailingBodyBytes(reader.remaining()));
|
|
}
|
|
Ok(value)
|
|
}
|
|
|
|
/// Decodes a full q IPC frame into its header and value.
|
|
///
|
|
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
|
pub fn decode_message(frame_bytes: bytes::Bytes) -> CoreResult<DecodedMessage> {
|
|
decode_message_with_options(frame_bytes, &DecodeOptions::default())
|
|
}
|
|
|
|
/// Decodes a full q IPC frame with configurable options.
|
|
pub fn decode_message_with_options(
|
|
frame_bytes: bytes::Bytes,
|
|
options: &DecodeOptions,
|
|
) -> CoreResult<DecodedMessage> {
|
|
let frame = Frame::parse(&frame_bytes)?;
|
|
let header = frame.header();
|
|
|
|
if header.encoding() != Encoding::LittleEndian {
|
|
return Err(CoreError::UnsupportedEndianness(header.encoding()));
|
|
}
|
|
|
|
if header.compression() != Compression::Uncompressed {
|
|
let decompressed = decompress_ipc_body(frame.body(), header.encoding())?;
|
|
let value = decode_value_with_options(
|
|
bytes::Bytes::from(decompressed),
|
|
header.encoding(),
|
|
options,
|
|
)?;
|
|
return Ok(DecodedMessage::new(header, value));
|
|
}
|
|
|
|
let value = decode_value_with_options(
|
|
frame_bytes.slice(crate::frame::HEADER_LEN..),
|
|
header.encoding(),
|
|
options,
|
|
)?;
|
|
Ok(DecodedMessage::new(header, value))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::protocol::Attribute;
|
|
|
|
#[test]
|
|
fn decode_int_atom_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from(vec![i8::from(TypeCode::IntAtom) as u8, 42, 0, 0, 0]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(value, Value::Atom(Atom::Int(42)));
|
|
assert_eq!(value.qtype(), ValueType::atom(Primitive::Int));
|
|
}
|
|
|
|
#[test]
|
|
fn decode_int_vector_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[6_u8, 1, 3, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
value,
|
|
Value::Vector(Vector::new(
|
|
Attribute::Sorted,
|
|
VectorData::from_i32s(&[1, 2, 3]),
|
|
))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_symbol_atom_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[245_u8, b'a', b'b', 0]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
value,
|
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_list_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[0_u8, 0, 2, 0, 0, 0, 250, 42, 0, 0, 0, 245, b'a', b'b', 0]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
value,
|
|
Value::List(List::new(
|
|
Attribute::None,
|
|
vec![
|
|
Value::Atom(Atom::Int(42)),
|
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
|
],
|
|
))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_dictionary_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[
|
|
99_u8, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0,
|
|
0,
|
|
]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
value,
|
|
Value::Dictionary(Dictionary::new(
|
|
false,
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::Symbol(vec![
|
|
bytes::Bytes::from_static(b"a"),
|
|
bytes::Bytes::from_static(b"b")
|
|
]),
|
|
)),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]),)),
|
|
))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_table_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[
|
|
98_u8, 0, 99, 11, 0, 2, 0, 0, 0, b's', b'y', b'm', 0, b'p', b'x', 0, 0, 0, 2, 0, 0,
|
|
0, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0,
|
|
]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(
|
|
value,
|
|
Value::Table(Table::new(
|
|
Attribute::None,
|
|
vec![
|
|
bytes::Bytes::from_static(b"sym"),
|
|
bytes::Bytes::from_static(b"px")
|
|
],
|
|
vec![
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::Symbol(vec![
|
|
bytes::Bytes::from_static(b"a"),
|
|
bytes::Bytes::from_static(b"b")
|
|
]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_i32s(&[10, 20]),
|
|
)),
|
|
],
|
|
))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn decode_unary_primitive_body() {
|
|
let value = decode_value(
|
|
bytes::Bytes::from_static(&[101_u8, 0]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(value, Value::UnaryPrimitive { opcode: 0 });
|
|
}
|
|
|
|
#[test]
|
|
fn decode_rejects_trailing_bytes() {
|
|
assert!(matches!(
|
|
decode_value(
|
|
bytes::Bytes::from_static(&[250_u8, 42, 0, 0, 0, 99]),
|
|
Encoding::LittleEndian
|
|
),
|
|
Err(CoreError::TrailingBodyBytes(1))
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn decode_rejects_malformed_table_structure() {
|
|
let err = decode_value(
|
|
bytes::Bytes::from_static(&[
|
|
98_u8, 0, 99, 11, 0, 1, 0, 0, 0, b'x', 0, 250, 42, 0, 0, 0,
|
|
]),
|
|
Encoding::LittleEndian,
|
|
)
|
|
.unwrap_err();
|
|
|
|
assert!(matches!(err, CoreError::InvalidStructure(_)));
|
|
}
|
|
|
|
#[test]
|
|
fn decode_rejects_big_endian() {
|
|
assert!(matches!(
|
|
decode_value(
|
|
bytes::Bytes::from_static(&[250_u8, 0, 0, 0, 42]),
|
|
Encoding::BigEndian
|
|
),
|
|
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
|
));
|
|
}
|
|
|
|
// -- Parallel decode tests --
|
|
|
|
use crate::encode::encode_value;
|
|
|
|
/// Helper: encode a table, decode with parallel=true and parallel=false,
|
|
/// and verify both produce identical results.
|
|
fn assert_parallel_matches_sequential(table: &Value) {
|
|
let body = encode_value(table, Encoding::LittleEndian).unwrap();
|
|
|
|
let seq_opts = DecodeOptions {
|
|
parallel: false,
|
|
..Default::default()
|
|
};
|
|
let par_opts = DecodeOptions {
|
|
parallel: true,
|
|
parallel_column_threshold: 1, // force parallel even for small tables
|
|
};
|
|
|
|
let seq = decode_value_with_options(
|
|
bytes::Bytes::from(body.clone()),
|
|
Encoding::LittleEndian,
|
|
&seq_opts,
|
|
)
|
|
.unwrap();
|
|
let par = decode_value_with_options(
|
|
bytes::Bytes::from(body.clone()),
|
|
Encoding::LittleEndian,
|
|
&par_opts,
|
|
)
|
|
.unwrap();
|
|
|
|
assert_eq!(seq, par, "parallel decode must match sequential decode");
|
|
assert_eq!(&seq, table, "decoded value must match original");
|
|
}
|
|
|
|
#[test]
|
|
fn parallel_decode_multi_column_table() {
|
|
let table = Value::Table(Table::new(
|
|
Attribute::None,
|
|
vec![
|
|
bytes::Bytes::from_static(b"a"),
|
|
bytes::Bytes::from_static(b"b"),
|
|
bytes::Bytes::from_static(b"c"),
|
|
bytes::Bytes::from_static(b"d"),
|
|
],
|
|
vec![
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_i32s(&[1, 2, 3]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::Symbol(vec![
|
|
bytes::Bytes::from_static(b"x"),
|
|
bytes::Bytes::from_static(b"y"),
|
|
bytes::Bytes::from_static(b"z"),
|
|
]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_i64s(&[100, 200, 300]),
|
|
)),
|
|
],
|
|
));
|
|
assert_parallel_matches_sequential(&table);
|
|
}
|
|
|
|
#[test]
|
|
fn parallel_decode_mixed_type_columns() {
|
|
let table = Value::Table(Table::new(
|
|
Attribute::None,
|
|
vec![
|
|
bytes::Bytes::from_static(b"bools"),
|
|
bytes::Bytes::from_static(b"guids"),
|
|
bytes::Bytes::from_static(b"chars"),
|
|
bytes::Bytes::from_static(b"times"),
|
|
bytes::Bytes::from_static(b"dates"),
|
|
],
|
|
vec![
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0])),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::Char(bytes::Bytes::from_static(b"ab")),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_times(&[1000, 2000]),
|
|
)),
|
|
Value::Vector(Vector::new(
|
|
Attribute::None,
|
|
VectorData::from_dates(&[100, 200]),
|
|
)),
|
|
],
|
|
));
|
|
assert_parallel_matches_sequential(&table);
|
|
}
|
|
|
|
#[test]
|
|
fn parallel_decode_below_threshold_falls_back_to_sequential() {
|
|
// 2 columns, threshold 4 → should use sequential path
|
|
let table = Value::Table(Table::new(
|
|
Attribute::None,
|
|
vec![
|
|
bytes::Bytes::from_static(b"a"),
|
|
bytes::Bytes::from_static(b"b"),
|
|
],
|
|
vec![
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3, 4]))),
|
|
],
|
|
));
|
|
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
|
let opts = DecodeOptions {
|
|
parallel: true,
|
|
parallel_column_threshold: 4,
|
|
};
|
|
let decoded = decode_value_with_options(
|
|
bytes::Bytes::from(body.clone()),
|
|
Encoding::LittleEndian,
|
|
&opts,
|
|
)
|
|
.unwrap();
|
|
assert_eq!(decoded, table);
|
|
}
|
|
|
|
#[test]
|
|
fn parallel_decode_non_table_ignores_parallel_flag() {
|
|
// Non-table values should decode normally regardless of parallel flag
|
|
let value = Value::Atom(Atom::Int(42));
|
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
|
let opts = DecodeOptions {
|
|
parallel: true,
|
|
parallel_column_threshold: 1,
|
|
};
|
|
let decoded = decode_value_with_options(
|
|
bytes::Bytes::from(body.clone()),
|
|
Encoding::LittleEndian,
|
|
&opts,
|
|
)
|
|
.unwrap();
|
|
assert_eq!(decoded, value);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_table_preamble_correct() {
|
|
let table = Value::Table(Table::new(
|
|
Attribute::None,
|
|
vec![
|
|
bytes::Bytes::from_static(b"a"),
|
|
bytes::Bytes::from_static(b"b"),
|
|
bytes::Bytes::from_static(b"c"),
|
|
bytes::Bytes::from_static(b"d"),
|
|
bytes::Bytes::from_static(b"e"),
|
|
],
|
|
vec![
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1]))),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[2]))),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3]))),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[4]))),
|
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[5]))),
|
|
],
|
|
));
|
|
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
|
let preamble = parse_table_preamble(&bytes::Bytes::from(body)).unwrap();
|
|
assert_eq!(preamble.num_columns, 5);
|
|
assert_eq!(preamble.column_names.len(), 5);
|
|
assert_eq!(&preamble.column_names[0][..], b"a");
|
|
assert_eq!(&preamble.column_names[4][..], b"e");
|
|
}
|
|
}
|