Vendor qroissant 0.3.0 baseline
This commit is contained in:
commit
53ac90fe84
56 changed files with 18309 additions and 0 deletions
19
crates/qroissant-core/Cargo.toml
Normal file
19
crates/qroissant-core/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
[package]
|
||||
name = "qroissant-core"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
name = "qroissant_core"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
bytemuck = { version = "1", features = ["derive"] }
|
||||
bytes = "1.11.1"
|
||||
memchr = "2"
|
||||
rayon = "1.10"
|
||||
tokio = { workspace = true, features = ["io-util"] }
|
||||
futures = { workspace = true }
|
||||
|
||||
907
crates/qroissant-core/src/decode.rs
Normal file
907
crates/qroissant-core/src/decode.rs
Normal file
|
|
@ -0,0 +1,907 @@
|
|||
use rayon::prelude::*;
|
||||
|
||||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
use crate::extent::value_byte_extent;
|
||||
use crate::frame::Compression;
|
||||
use crate::frame::Encoding;
|
||||
use crate::frame::Frame;
|
||||
use crate::frame::MessageHeader;
|
||||
use crate::frame::decompress_ipc_body;
|
||||
use crate::protocol::Attribute;
|
||||
use crate::protocol::Primitive;
|
||||
use crate::protocol::Shape;
|
||||
use crate::protocol::TypeCode;
|
||||
use crate::protocol::ValueType;
|
||||
use crate::value::Atom;
|
||||
use crate::value::Dictionary;
|
||||
use crate::value::List;
|
||||
use crate::value::Table;
|
||||
use crate::value::Value;
|
||||
use crate::value::Vector;
|
||||
use crate::value::VectorData;
|
||||
|
||||
/// Fully decoded q IPC message.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct DecodedMessage {
|
||||
header: MessageHeader,
|
||||
value: Value,
|
||||
}
|
||||
|
||||
impl DecodedMessage {
|
||||
pub fn new(header: MessageHeader, value: Value) -> Self {
|
||||
Self { header, value }
|
||||
}
|
||||
|
||||
pub fn header(&self) -> MessageHeader {
|
||||
self.header
|
||||
}
|
||||
|
||||
pub fn value(&self) -> &Value {
|
||||
&self.value
|
||||
}
|
||||
|
||||
pub fn qtype(&self) -> ValueType {
|
||||
self.value.qtype()
|
||||
}
|
||||
|
||||
pub fn into_parts(self) -> (MessageHeader, Value) {
|
||||
(self.header, self.value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Options controlling how q IPC messages are decoded.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecodeOptions {
|
||||
/// When `true` and the top-level value is a table with at least
|
||||
/// `parallel_column_threshold` columns, columns are decoded in parallel
|
||||
/// using rayon's thread pool.
|
||||
pub parallel: bool,
|
||||
/// Minimum number of columns required to trigger parallel decode.
|
||||
pub parallel_column_threshold: usize,
|
||||
}
|
||||
|
||||
impl Default for DecodeOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
parallel: true,
|
||||
parallel_column_threshold: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct BodyReader {
|
||||
bytes: bytes::Bytes,
|
||||
offset: usize,
|
||||
}
|
||||
|
||||
impl BodyReader {
|
||||
fn new(bytes: bytes::Bytes) -> Self {
|
||||
Self { bytes, offset: 0 }
|
||||
}
|
||||
|
||||
fn remaining(&self) -> usize {
|
||||
self.bytes.len().saturating_sub(self.offset)
|
||||
}
|
||||
|
||||
fn read_exact<const N: usize>(&mut self) -> CoreResult<[u8; N]> {
|
||||
let end = self
|
||||
.offset
|
||||
.checked_add(N)
|
||||
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||
let slice = self
|
||||
.bytes
|
||||
.get(self.offset..end)
|
||||
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
||||
self.offset = end;
|
||||
Ok(slice.try_into().expect("fixed-size slice length checked"))
|
||||
}
|
||||
|
||||
/// Returns a borrowed slice of `len` bytes and advances the offset.
|
||||
fn read_slice(&mut self, len: usize) -> CoreResult<&[u8]> {
|
||||
let end = self
|
||||
.offset
|
||||
.checked_add(len)
|
||||
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||
let slice = self
|
||||
.bytes
|
||||
.get(self.offset..end)
|
||||
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
||||
self.offset = end;
|
||||
Ok(slice)
|
||||
}
|
||||
|
||||
/// Returns a zero-copy Bytes wrapper of `len` bytes and advances the offset.
|
||||
fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
|
||||
let end = self
|
||||
.offset
|
||||
.checked_add(len)
|
||||
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||
if end > self.bytes.len() {
|
||||
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||
}
|
||||
let slice = self.bytes.slice(self.offset..end);
|
||||
self.offset = end;
|
||||
Ok(slice)
|
||||
}
|
||||
|
||||
/// Returns a `Bytes` wrapper of `count * size_of::<T>()` bytes, aligned for `T`.
|
||||
///
|
||||
/// If the current offset is already aligned for `T`, this is zero-copy
|
||||
/// (a `Bytes::slice`). Otherwise it copies into a new aligned allocation.
|
||||
fn read_bytes_aligned<T: bytemuck::Pod>(&mut self, count: usize) -> CoreResult<bytes::Bytes> {
|
||||
let byte_len = count
|
||||
.checked_mul(std::mem::size_of::<T>())
|
||||
.ok_or(CoreError::LengthOverflow(count))?;
|
||||
let end = self
|
||||
.offset
|
||||
.checked_add(byte_len)
|
||||
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||
if end > self.bytes.len() {
|
||||
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||
}
|
||||
let ptr = self.bytes[self.offset..].as_ptr();
|
||||
let align = std::mem::align_of::<T>();
|
||||
let result = if (ptr as usize) % align == 0 {
|
||||
// Already aligned — zero-copy slice.
|
||||
self.bytes.slice(self.offset..end)
|
||||
} else {
|
||||
// Misaligned — must copy into an aligned allocation.
|
||||
bytes::Bytes::copy_from_slice(&self.bytes[self.offset..end])
|
||||
};
|
||||
self.offset = end;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn read_u8(&mut self) -> CoreResult<u8> {
|
||||
Ok(self.read_exact::<1>()?[0])
|
||||
}
|
||||
|
||||
fn read_i8(&mut self) -> CoreResult<i8> {
|
||||
Ok(self.read_u8()? as i8)
|
||||
}
|
||||
|
||||
fn read_i16(&mut self) -> CoreResult<i16> {
|
||||
Ok(i16::from_le_bytes(self.read_exact::<2>()?))
|
||||
}
|
||||
|
||||
fn read_i32(&mut self) -> CoreResult<i32> {
|
||||
Ok(i32::from_le_bytes(self.read_exact::<4>()?))
|
||||
}
|
||||
|
||||
fn read_i64(&mut self) -> CoreResult<i64> {
|
||||
Ok(i64::from_le_bytes(self.read_exact::<8>()?))
|
||||
}
|
||||
|
||||
fn read_f32(&mut self) -> CoreResult<f32> {
|
||||
Ok(f32::from_le_bytes(self.read_exact::<4>()?))
|
||||
}
|
||||
|
||||
fn read_f64(&mut self) -> CoreResult<f64> {
|
||||
Ok(f64::from_le_bytes(self.read_exact::<8>()?))
|
||||
}
|
||||
|
||||
fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
|
||||
self.read_exact::<16>()
|
||||
}
|
||||
|
||||
fn read_length(&mut self) -> CoreResult<usize> {
|
||||
let length = self.read_i32()?;
|
||||
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
|
||||
}
|
||||
|
||||
fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
|
||||
let remaining = &self.bytes[self.offset..];
|
||||
match memchr::memchr(0, remaining) {
|
||||
Some(pos) => {
|
||||
let symbol = self.bytes.slice(self.offset..self.offset + pos);
|
||||
self.offset += pos + 1;
|
||||
Ok(symbol)
|
||||
}
|
||||
None => Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads `count` elements of a fixed-width type as a bulk memcpy.
|
||||
///
|
||||
/// The wire bytes are reinterpreted directly into the target `Vec<T>` via
|
||||
/// `bytemuck::cast_slice_mut`, avoiding per-element parsing. This is valid
|
||||
/// because we only support little-endian payloads and all target platforms
|
||||
/// are little-endian.
|
||||
fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
|
||||
&mut self,
|
||||
count: usize,
|
||||
) -> CoreResult<Vec<T>> {
|
||||
let byte_len = count
|
||||
.checked_mul(std::mem::size_of::<T>())
|
||||
.ok_or(CoreError::LengthOverflow(count))?;
|
||||
let bytes = self.read_slice(byte_len)?;
|
||||
let mut values = vec![T::zeroed(); count];
|
||||
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
|
||||
dst.copy_from_slice(bytes);
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_atom(reader: &mut BodyReader, primitive: Primitive) -> CoreResult<Atom> {
|
||||
Ok(match primitive {
|
||||
Primitive::Boolean => Atom::Boolean(reader.read_u8()? != 0),
|
||||
Primitive::Guid => Atom::Guid(reader.read_guid()?),
|
||||
Primitive::Byte => Atom::Byte(reader.read_u8()?),
|
||||
Primitive::Short => Atom::Short(reader.read_i16()?),
|
||||
Primitive::Int => Atom::Int(reader.read_i32()?),
|
||||
Primitive::Long => Atom::Long(reader.read_i64()?),
|
||||
Primitive::Real => Atom::Real(reader.read_f32()?),
|
||||
Primitive::Float => Atom::Float(reader.read_f64()?),
|
||||
Primitive::Char => Atom::Char(reader.read_u8()?),
|
||||
Primitive::Symbol => Atom::Symbol(reader.read_symbol()?),
|
||||
Primitive::Timestamp => Atom::Timestamp(reader.read_i64()?),
|
||||
Primitive::Month => Atom::Month(reader.read_i32()?),
|
||||
Primitive::Date => Atom::Date(reader.read_i32()?),
|
||||
Primitive::Datetime => Atom::Datetime(reader.read_f64()?),
|
||||
Primitive::Timespan => Atom::Timespan(reader.read_i64()?),
|
||||
Primitive::Minute => Atom::Minute(reader.read_i32()?),
|
||||
Primitive::Second => Atom::Second(reader.read_i32()?),
|
||||
Primitive::Time => Atom::Time(reader.read_i32()?),
|
||||
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
|
||||
})
|
||||
}
|
||||
|
||||
fn decode_vector(
|
||||
reader: &mut BodyReader,
|
||||
primitive: Primitive,
|
||||
attribute: Attribute,
|
||||
length: usize,
|
||||
) -> CoreResult<Vector> {
|
||||
let data = match primitive {
|
||||
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length)?),
|
||||
Primitive::Guid => VectorData::Guid(
|
||||
reader.read_bytes(
|
||||
length
|
||||
.checked_mul(16)
|
||||
.ok_or(CoreError::LengthOverflow(length))?,
|
||||
)?,
|
||||
),
|
||||
Primitive::Byte => VectorData::Byte(reader.read_bytes(length)?),
|
||||
Primitive::Short => VectorData::Short(reader.read_bytes_aligned::<i16>(length)?),
|
||||
Primitive::Int => VectorData::Int(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Long => VectorData::Long(reader.read_bytes_aligned::<i64>(length)?),
|
||||
Primitive::Real => VectorData::Real(reader.read_bytes_aligned::<f32>(length)?),
|
||||
Primitive::Float => VectorData::Float(reader.read_bytes_aligned::<f64>(length)?),
|
||||
Primitive::Char => VectorData::Char(reader.read_bytes(length)?),
|
||||
Primitive::Symbol => {
|
||||
let mut values = Vec::with_capacity(length);
|
||||
for _ in 0..length {
|
||||
values.push(reader.read_symbol()?);
|
||||
}
|
||||
VectorData::Symbol(values)
|
||||
}
|
||||
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes_aligned::<i64>(length)?),
|
||||
Primitive::Month => VectorData::Month(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Date => VectorData::Date(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Datetime => VectorData::Datetime(reader.read_bytes_aligned::<f64>(length)?),
|
||||
Primitive::Timespan => VectorData::Timespan(reader.read_bytes_aligned::<i64>(length)?),
|
||||
Primitive::Minute => VectorData::Minute(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Second => VectorData::Second(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Time => VectorData::Time(reader.read_bytes_aligned::<i32>(length)?),
|
||||
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
|
||||
};
|
||||
|
||||
Ok(Vector::new(attribute, data))
|
||||
}
|
||||
|
||||
pub(crate) fn extract_symbol_names(value: &Value) -> CoreResult<Vec<bytes::Bytes>> {
|
||||
match value {
|
||||
Value::Vector(vector) => match vector.data() {
|
||||
VectorData::Symbol(values) => Ok(values.clone()),
|
||||
_ => Err(CoreError::InvalidStructure(
|
||||
"q table column names must be a symbol vector".to_string(),
|
||||
)),
|
||||
},
|
||||
_ => Err(CoreError::InvalidStructure(
|
||||
"q table column names must be encoded as a symbol vector".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn extract_columns(value: &Value) -> CoreResult<Vec<Value>> {
|
||||
match value {
|
||||
Value::List(list) => Ok(list.values().to_vec()),
|
||||
_ => Err(CoreError::InvalidStructure(
|
||||
"q table columns must be encoded as a general list".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_inner(reader: &mut BodyReader) -> CoreResult<Value> {
|
||||
let type_code = TypeCode::try_from(reader.read_i8()?)?;
|
||||
match type_code.shape() {
|
||||
Shape::Atom => Ok(Value::Atom(decode_atom(
|
||||
reader,
|
||||
type_code
|
||||
.primitive()
|
||||
.expect("atom types always have a primitive"),
|
||||
)?)),
|
||||
Shape::Vector => {
|
||||
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||
let length = reader.read_length()?;
|
||||
Ok(Value::Vector(decode_vector(
|
||||
reader,
|
||||
type_code
|
||||
.primitive()
|
||||
.expect("vector types always have a primitive"),
|
||||
attribute,
|
||||
length,
|
||||
)?))
|
||||
}
|
||||
Shape::List => {
|
||||
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||
let length = reader.read_length()?;
|
||||
let mut values = Vec::with_capacity(length);
|
||||
for _ in 0..length {
|
||||
values.push(decode_inner(reader)?);
|
||||
}
|
||||
Ok(Value::List(List::new(attribute, values)))
|
||||
}
|
||||
Shape::Dictionary => {
|
||||
let sorted = matches!(type_code, TypeCode::SortedDictionary);
|
||||
let keys = decode_inner(reader)?;
|
||||
let values = decode_inner(reader)?;
|
||||
let dictionary = Dictionary::new(sorted, keys, values);
|
||||
dictionary.validate()?;
|
||||
Ok(Value::Dictionary(dictionary))
|
||||
}
|
||||
Shape::Table => {
|
||||
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||
let encoded_dictionary = decode_inner(reader)?;
|
||||
let Value::Dictionary(dictionary) = encoded_dictionary else {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"q table payload must contain a dictionary body".to_string(),
|
||||
));
|
||||
};
|
||||
let column_names = extract_symbol_names(dictionary.keys())?;
|
||||
let columns = extract_columns(dictionary.values())?;
|
||||
let table = Table::new(attribute, column_names, columns);
|
||||
table.validate()?;
|
||||
Ok(Value::Table(table))
|
||||
}
|
||||
Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
|
||||
opcode: reader.read_i8()?,
|
||||
}),
|
||||
Shape::Error => {
|
||||
let error_msg = reader.read_symbol()?;
|
||||
Err(CoreError::QRuntime(
|
||||
String::from_utf8_lossy(&error_msg).into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed table preamble: everything before the column data.
|
||||
struct TablePreamble {
|
||||
attribute: Attribute,
|
||||
column_names: Vec<bytes::Bytes>,
|
||||
/// Byte offset within the body where column values start (past the
|
||||
/// general-list header).
|
||||
columns_start: usize,
|
||||
num_columns: usize,
|
||||
}
|
||||
|
||||
/// Parses the table header, dictionary keys (column names), and list header.
|
||||
///
|
||||
/// Shared by both the sequential and parallel table decode paths.
|
||||
fn parse_table_preamble(body: &bytes::Bytes) -> CoreResult<TablePreamble> {
|
||||
let mut reader = BodyReader::new(body.clone());
|
||||
|
||||
// Table: type(1) + attribute(1)
|
||||
let _type_code = reader.read_i8()?; // 98 = Table
|
||||
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||
|
||||
// Dictionary: type(1) + keys + values
|
||||
let dict_type = TypeCode::try_from(reader.read_i8()?)?;
|
||||
if !matches!(dict_type, TypeCode::Dictionary | TypeCode::SortedDictionary) {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"q table payload must contain a dictionary body".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
// Keys = symbol vector (column names)
|
||||
let keys = decode_inner(&mut reader)?;
|
||||
let column_names = extract_symbol_names(&keys)?;
|
||||
|
||||
// Values = general list: type(1) + attr(1) + length(4) + column values
|
||||
let list_type = reader.read_i8()?;
|
||||
if list_type != 0 {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"q table columns must be encoded as a general list".to_string(),
|
||||
));
|
||||
}
|
||||
let _list_attr = reader.read_i8()?;
|
||||
let num_columns = reader.read_length()?;
|
||||
|
||||
if num_columns != column_names.len() {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"table has {} column names but {} column values",
|
||||
column_names.len(),
|
||||
num_columns
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(TablePreamble {
|
||||
attribute,
|
||||
column_names,
|
||||
columns_start: reader.offset,
|
||||
num_columns,
|
||||
})
|
||||
}
|
||||
|
||||
/// Attempts parallel table decode. Returns `None` if the column count is
|
||||
/// below the threshold, allowing the caller to fall back to sequential.
|
||||
fn try_decode_table_parallel(body: bytes::Bytes, threshold: usize) -> CoreResult<Option<Value>> {
|
||||
let preamble = parse_table_preamble(&body)?;
|
||||
|
||||
if preamble.num_columns < threshold {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Use value_byte_extent to find each column's byte range without parsing
|
||||
let mut column_ranges: Vec<(usize, usize)> = Vec::with_capacity(preamble.num_columns);
|
||||
let mut scan = preamble.columns_start;
|
||||
for _ in 0..preamble.num_columns {
|
||||
let extent = value_byte_extent(&body, scan)?;
|
||||
column_ranges.push((scan, scan + extent));
|
||||
scan += extent;
|
||||
}
|
||||
|
||||
// Parallel decode: each column gets its own byte slice
|
||||
let columns: Vec<CoreResult<Value>> = column_ranges
|
||||
.par_iter()
|
||||
.map(|&(start, end)| {
|
||||
let mut col_reader = BodyReader::new(body.slice(start..end));
|
||||
let value = decode_inner(&mut col_reader)?;
|
||||
if col_reader.remaining() != 0 {
|
||||
return Err(CoreError::TrailingBodyBytes(col_reader.remaining()));
|
||||
}
|
||||
Ok(value)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let columns: Vec<Value> = columns.into_iter().collect::<CoreResult<Vec<_>>>()?;
|
||||
|
||||
let table = Table::new(preamble.attribute, preamble.column_names, columns);
|
||||
table.validate()?;
|
||||
Ok(Some(Value::Table(table)))
|
||||
}
|
||||
|
||||
/// Decodes one q value body from a little-endian byte slice.
|
||||
///
|
||||
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
||||
pub fn decode_value(body: bytes::Bytes, encoding: Encoding) -> CoreResult<Value> {
|
||||
decode_value_with_options(body, encoding, &DecodeOptions::default())
|
||||
}
|
||||
|
||||
/// Decodes one q value body with configurable options.
|
||||
///
|
||||
/// When `options.parallel` is `true` and the body contains a table with
|
||||
/// enough columns, columns are decoded in parallel using rayon.
|
||||
pub fn decode_value_with_options(
|
||||
body: bytes::Bytes,
|
||||
encoding: Encoding,
|
||||
options: &DecodeOptions,
|
||||
) -> CoreResult<Value> {
|
||||
if encoding != Encoding::LittleEndian {
|
||||
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||
}
|
||||
|
||||
// Fast path: parallel table decode
|
||||
if options.parallel && body.first() == Some(&98) {
|
||||
if let Some(table) =
|
||||
try_decode_table_parallel(body.clone(), options.parallel_column_threshold)?
|
||||
{
|
||||
return Ok(table);
|
||||
}
|
||||
}
|
||||
|
||||
let mut reader = BodyReader::new(body);
|
||||
let value = decode_inner(&mut reader)?;
|
||||
if reader.remaining() != 0 {
|
||||
return Err(CoreError::TrailingBodyBytes(reader.remaining()));
|
||||
}
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
/// Decodes a full q IPC frame into its header and value.
|
||||
///
|
||||
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
||||
pub fn decode_message(frame_bytes: bytes::Bytes) -> CoreResult<DecodedMessage> {
|
||||
decode_message_with_options(frame_bytes, &DecodeOptions::default())
|
||||
}
|
||||
|
||||
/// Decodes a full q IPC frame with configurable options.
|
||||
pub fn decode_message_with_options(
|
||||
frame_bytes: bytes::Bytes,
|
||||
options: &DecodeOptions,
|
||||
) -> CoreResult<DecodedMessage> {
|
||||
let frame = Frame::parse(&frame_bytes)?;
|
||||
let header = frame.header();
|
||||
|
||||
if header.encoding() != Encoding::LittleEndian {
|
||||
return Err(CoreError::UnsupportedEndianness(header.encoding()));
|
||||
}
|
||||
|
||||
if header.compression() != Compression::Uncompressed {
|
||||
let decompressed = decompress_ipc_body(frame.body(), header.encoding())?;
|
||||
let value = decode_value_with_options(
|
||||
bytes::Bytes::from(decompressed),
|
||||
header.encoding(),
|
||||
options,
|
||||
)?;
|
||||
return Ok(DecodedMessage::new(header, value));
|
||||
}
|
||||
|
||||
let value = decode_value_with_options(
|
||||
frame_bytes.slice(crate::frame::HEADER_LEN..),
|
||||
header.encoding(),
|
||||
options,
|
||||
)?;
|
||||
Ok(DecodedMessage::new(header, value))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::protocol::Attribute;
|
||||
|
||||
#[test]
|
||||
fn decode_int_atom_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from(vec![i8::from(TypeCode::IntAtom) as u8, 42, 0, 0, 0]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(value, Value::Atom(Atom::Int(42)));
|
||||
assert_eq!(value.qtype(), ValueType::atom(Primitive::Int));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_int_vector_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[6_u8, 1, 3, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::Sorted,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_symbol_atom_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[245_u8, b'a', b'b', 0]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_list_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[0_u8, 0, 2, 0, 0, 0, 250, 42, 0, 0, 0, 245, b'a', b'b', 0]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::List(List::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
Value::Atom(Atom::Int(42)),
|
||||
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
||||
],
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_dictionary_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[
|
||||
99_u8, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0,
|
||||
0,
|
||||
]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Dictionary(Dictionary::new(
|
||||
false,
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b")
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]),)),
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_table_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[
|
||||
98_u8, 0, 99, 11, 0, 2, 0, 0, 0, b's', b'y', b'm', 0, b'p', b'x', 0, 0, 0, 2, 0, 0,
|
||||
0, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0,
|
||||
]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"sym"),
|
||||
bytes::Bytes::from_static(b"px")
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b")
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_i32s(&[10, 20]),
|
||||
)),
|
||||
],
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_unary_primitive_body() {
|
||||
let value = decode_value(
|
||||
bytes::Bytes::from_static(&[101_u8, 0]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(value, Value::UnaryPrimitive { opcode: 0 });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rejects_trailing_bytes() {
|
||||
assert!(matches!(
|
||||
decode_value(
|
||||
bytes::Bytes::from_static(&[250_u8, 42, 0, 0, 0, 99]),
|
||||
Encoding::LittleEndian
|
||||
),
|
||||
Err(CoreError::TrailingBodyBytes(1))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rejects_malformed_table_structure() {
|
||||
let err = decode_value(
|
||||
bytes::Bytes::from_static(&[
|
||||
98_u8, 0, 99, 11, 0, 1, 0, 0, 0, b'x', 0, 250, 42, 0, 0, 0,
|
||||
]),
|
||||
Encoding::LittleEndian,
|
||||
)
|
||||
.unwrap_err();
|
||||
|
||||
assert!(matches!(err, CoreError::InvalidStructure(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rejects_big_endian() {
|
||||
assert!(matches!(
|
||||
decode_value(
|
||||
bytes::Bytes::from_static(&[250_u8, 0, 0, 0, 42]),
|
||||
Encoding::BigEndian
|
||||
),
|
||||
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||
));
|
||||
}
|
||||
|
||||
// -- Parallel decode tests --
|
||||
|
||||
use crate::encode::encode_value;
|
||||
|
||||
/// Helper: encode a table, decode with parallel=true and parallel=false,
|
||||
/// and verify both produce identical results.
|
||||
fn assert_parallel_matches_sequential(table: &Value) {
|
||||
let body = encode_value(table, Encoding::LittleEndian).unwrap();
|
||||
|
||||
let seq_opts = DecodeOptions {
|
||||
parallel: false,
|
||||
..Default::default()
|
||||
};
|
||||
let par_opts = DecodeOptions {
|
||||
parallel: true,
|
||||
parallel_column_threshold: 1, // force parallel even for small tables
|
||||
};
|
||||
|
||||
let seq = decode_value_with_options(
|
||||
bytes::Bytes::from(body.clone()),
|
||||
Encoding::LittleEndian,
|
||||
&seq_opts,
|
||||
)
|
||||
.unwrap();
|
||||
let par = decode_value_with_options(
|
||||
bytes::Bytes::from(body.clone()),
|
||||
Encoding::LittleEndian,
|
||||
&par_opts,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(seq, par, "parallel decode must match sequential decode");
|
||||
assert_eq!(&seq, table, "decoded value must match original");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parallel_decode_multi_column_table() {
|
||||
let table = Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
bytes::Bytes::from_static(b"c"),
|
||||
bytes::Bytes::from_static(b"d"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"x"),
|
||||
bytes::Bytes::from_static(b"y"),
|
||||
bytes::Bytes::from_static(b"z"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_i64s(&[100, 200, 300]),
|
||||
)),
|
||||
],
|
||||
));
|
||||
assert_parallel_matches_sequential(&table);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parallel_decode_mixed_type_columns() {
|
||||
let table = Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"bools"),
|
||||
bytes::Bytes::from_static(b"guids"),
|
||||
bytes::Bytes::from_static(b"chars"),
|
||||
bytes::Bytes::from_static(b"times"),
|
||||
bytes::Bytes::from_static(b"dates"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0])),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Char(bytes::Bytes::from_static(b"ab")),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_times(&[1000, 2000]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_dates(&[100, 200]),
|
||||
)),
|
||||
],
|
||||
));
|
||||
assert_parallel_matches_sequential(&table);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parallel_decode_below_threshold_falls_back_to_sequential() {
|
||||
// 2 columns, threshold 4 → should use sequential path
|
||||
let table = Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3, 4]))),
|
||||
],
|
||||
));
|
||||
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||
let opts = DecodeOptions {
|
||||
parallel: true,
|
||||
parallel_column_threshold: 4,
|
||||
};
|
||||
let decoded = decode_value_with_options(
|
||||
bytes::Bytes::from(body.clone()),
|
||||
Encoding::LittleEndian,
|
||||
&opts,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(decoded, table);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parallel_decode_non_table_ignores_parallel_flag() {
|
||||
// Non-table values should decode normally regardless of parallel flag
|
||||
let value = Value::Atom(Atom::Int(42));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
let opts = DecodeOptions {
|
||||
parallel: true,
|
||||
parallel_column_threshold: 1,
|
||||
};
|
||||
let decoded = decode_value_with_options(
|
||||
bytes::Bytes::from(body.clone()),
|
||||
Encoding::LittleEndian,
|
||||
&opts,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(decoded, value);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_table_preamble_correct() {
|
||||
let table = Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
bytes::Bytes::from_static(b"c"),
|
||||
bytes::Bytes::from_static(b"d"),
|
||||
bytes::Bytes::from_static(b"e"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1]))),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[2]))),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3]))),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[4]))),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[5]))),
|
||||
],
|
||||
));
|
||||
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||
let preamble = parse_table_preamble(&bytes::Bytes::from(body)).unwrap();
|
||||
assert_eq!(preamble.num_columns, 5);
|
||||
assert_eq!(preamble.column_names.len(), 5);
|
||||
assert_eq!(&preamble.column_names[0][..], b"a");
|
||||
assert_eq!(&preamble.column_names[4][..], b"e");
|
||||
}
|
||||
}
|
||||
385
crates/qroissant-core/src/encode.rs
Normal file
385
crates/qroissant-core/src/encode.rs
Normal file
|
|
@ -0,0 +1,385 @@
|
|||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
use crate::frame::Compression;
|
||||
use crate::frame::Encoding;
|
||||
use crate::frame::MessageType;
|
||||
use crate::frame::serialize_body_as_message;
|
||||
use crate::protocol::TypeCode;
|
||||
use crate::value::Atom;
|
||||
use crate::value::List;
|
||||
use crate::value::Table;
|
||||
use crate::value::Value;
|
||||
use crate::value::Vector;
|
||||
use crate::value::VectorData;
|
||||
|
||||
fn push_i16(buffer: &mut Vec<u8>, value: i16) {
|
||||
buffer.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
fn push_i32(buffer: &mut Vec<u8>, value: i32) {
|
||||
buffer.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
fn push_i64(buffer: &mut Vec<u8>, value: i64) {
|
||||
buffer.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
fn push_f32(buffer: &mut Vec<u8>, value: f32) {
|
||||
buffer.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
fn push_f64(buffer: &mut Vec<u8>, value: f64) {
|
||||
buffer.extend_from_slice(&value.to_le_bytes());
|
||||
}
|
||||
|
||||
fn push_length(buffer: &mut Vec<u8>, value: usize) {
|
||||
let value = i32::try_from(value).expect("supported q vectors fit in 32-bit length");
|
||||
push_i32(buffer, value);
|
||||
}
|
||||
|
||||
fn encode_atom(atom: &Atom, buffer: &mut Vec<u8>) {
|
||||
match atom {
|
||||
Atom::Boolean(value) => {
|
||||
buffer.push(TypeCode::BooleanAtom as i8 as u8);
|
||||
buffer.push(u8::from(*value));
|
||||
}
|
||||
Atom::Guid(value) => {
|
||||
buffer.push(TypeCode::GuidAtom as i8 as u8);
|
||||
buffer.extend_from_slice(value);
|
||||
}
|
||||
Atom::Byte(value) => {
|
||||
buffer.push(TypeCode::ByteAtom as i8 as u8);
|
||||
buffer.push(*value);
|
||||
}
|
||||
Atom::Short(value) => {
|
||||
buffer.push(TypeCode::ShortAtom as i8 as u8);
|
||||
push_i16(buffer, *value);
|
||||
}
|
||||
Atom::Int(value) => {
|
||||
buffer.push(TypeCode::IntAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
Atom::Long(value) => {
|
||||
buffer.push(TypeCode::LongAtom as i8 as u8);
|
||||
push_i64(buffer, *value);
|
||||
}
|
||||
Atom::Real(value) => {
|
||||
buffer.push(TypeCode::RealAtom as i8 as u8);
|
||||
push_f32(buffer, *value);
|
||||
}
|
||||
Atom::Float(value) => {
|
||||
buffer.push(TypeCode::FloatAtom as i8 as u8);
|
||||
push_f64(buffer, *value);
|
||||
}
|
||||
Atom::Char(value) => {
|
||||
buffer.push(TypeCode::CharAtom as i8 as u8);
|
||||
buffer.push(*value);
|
||||
}
|
||||
Atom::Symbol(value) => {
|
||||
buffer.push(TypeCode::SymbolAtom as i8 as u8);
|
||||
buffer.extend_from_slice(value);
|
||||
buffer.push(0);
|
||||
}
|
||||
Atom::Timestamp(value) => {
|
||||
buffer.push(TypeCode::TimestampAtom as i8 as u8);
|
||||
push_i64(buffer, *value);
|
||||
}
|
||||
Atom::Month(value) => {
|
||||
buffer.push(TypeCode::MonthAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
Atom::Date(value) => {
|
||||
buffer.push(TypeCode::DateAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
Atom::Datetime(value) => {
|
||||
buffer.push(TypeCode::DatetimeAtom as i8 as u8);
|
||||
push_f64(buffer, *value);
|
||||
}
|
||||
Atom::Timespan(value) => {
|
||||
buffer.push(TypeCode::TimespanAtom as i8 as u8);
|
||||
push_i64(buffer, *value);
|
||||
}
|
||||
Atom::Minute(value) => {
|
||||
buffer.push(TypeCode::MinuteAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
Atom::Second(value) => {
|
||||
buffer.push(TypeCode::SecondAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
Atom::Time(value) => {
|
||||
buffer.push(TypeCode::TimeAtom as i8 as u8);
|
||||
push_i32(buffer, *value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_vector(vector: &Vector, buffer: &mut Vec<u8>) {
|
||||
let attribute = i8::from(vector.attribute()) as u8;
|
||||
let data = vector.data();
|
||||
let len = data.len();
|
||||
|
||||
// All non-Symbol variants store raw Bytes; pick the type code, write header + raw bytes.
|
||||
let (type_code, raw) = match data {
|
||||
VectorData::Boolean(b) => (TypeCode::BooleanVector, Some(b)),
|
||||
VectorData::Guid(b) => (TypeCode::GuidVector, Some(b)),
|
||||
VectorData::Byte(b) => (TypeCode::ByteVector, Some(b)),
|
||||
VectorData::Short(b) => (TypeCode::ShortVector, Some(b)),
|
||||
VectorData::Int(b) => (TypeCode::IntVector, Some(b)),
|
||||
VectorData::Long(b) => (TypeCode::LongVector, Some(b)),
|
||||
VectorData::Real(b) => (TypeCode::RealVector, Some(b)),
|
||||
VectorData::Float(b) => (TypeCode::FloatVector, Some(b)),
|
||||
VectorData::Char(b) => (TypeCode::CharVector, Some(b)),
|
||||
VectorData::Timestamp(b) => (TypeCode::TimestampVector, Some(b)),
|
||||
VectorData::Month(b) => (TypeCode::MonthVector, Some(b)),
|
||||
VectorData::Date(b) => (TypeCode::DateVector, Some(b)),
|
||||
VectorData::Datetime(b) => (TypeCode::DatetimeVector, Some(b)),
|
||||
VectorData::Timespan(b) => (TypeCode::TimespanVector, Some(b)),
|
||||
VectorData::Minute(b) => (TypeCode::MinuteVector, Some(b)),
|
||||
VectorData::Second(b) => (TypeCode::SecondVector, Some(b)),
|
||||
VectorData::Time(b) => (TypeCode::TimeVector, Some(b)),
|
||||
VectorData::Symbol(_) => (TypeCode::SymbolVector, None),
|
||||
};
|
||||
|
||||
buffer.push(type_code as i8 as u8);
|
||||
buffer.push(attribute);
|
||||
push_length(buffer, len);
|
||||
|
||||
if let Some(raw) = raw {
|
||||
buffer.extend_from_slice(raw);
|
||||
} else if let VectorData::Symbol(values) = data {
|
||||
for value in values {
|
||||
buffer.extend_from_slice(value);
|
||||
buffer.push(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_table(table: &Table, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||
buffer.push(TypeCode::Table as i8 as u8);
|
||||
buffer.push(i8::from(table.attribute()) as u8);
|
||||
|
||||
buffer.push(TypeCode::Dictionary as i8 as u8);
|
||||
buffer.push(TypeCode::SymbolVector as i8 as u8);
|
||||
buffer.push(0);
|
||||
push_length(buffer, table.column_names().len());
|
||||
for name in table.column_names() {
|
||||
buffer.extend_from_slice(name);
|
||||
buffer.push(0);
|
||||
}
|
||||
|
||||
buffer.push(TypeCode::GeneralList as i8 as u8);
|
||||
buffer.push(0);
|
||||
push_length(buffer, table.columns().len());
|
||||
for column in table.columns() {
|
||||
encode_value_into(column, buffer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode_list(list: &List, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||
buffer.push(TypeCode::GeneralList as i8 as u8);
|
||||
buffer.push(i8::from(list.attribute()) as u8);
|
||||
push_length(buffer, list.len());
|
||||
for value in list.values() {
|
||||
encode_value_into(value, buffer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode_value_into(value: &Value, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||
match value {
|
||||
Value::Atom(atom) => encode_atom(atom, buffer),
|
||||
Value::Vector(vector) => encode_vector(vector, buffer),
|
||||
Value::List(list) => encode_list(list, buffer)?,
|
||||
Value::Dictionary(dictionary) => {
|
||||
dictionary.validate()?;
|
||||
buffer.push(if dictionary.sorted() {
|
||||
TypeCode::SortedDictionary as i8 as u8
|
||||
} else {
|
||||
TypeCode::Dictionary as i8 as u8
|
||||
});
|
||||
encode_value_into(dictionary.keys(), buffer)?;
|
||||
encode_value_into(dictionary.values(), buffer)?;
|
||||
}
|
||||
Value::Table(table) => {
|
||||
table.validate()?;
|
||||
encode_table(table, buffer)?;
|
||||
}
|
||||
Value::UnaryPrimitive { opcode } => {
|
||||
buffer.push(TypeCode::UnaryPrimitive as i8 as u8);
|
||||
buffer.push(*opcode as u8);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Encodes a supported q value as a little-endian q IPC body.
|
||||
///
|
||||
/// Returns `UnsupportedEndianness` for big-endian encoding.
|
||||
pub fn encode_value(value: &Value, encoding: Encoding) -> CoreResult<Vec<u8>> {
|
||||
if encoding != Encoding::LittleEndian {
|
||||
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||
}
|
||||
let mut buffer = Vec::new();
|
||||
encode_value_into(value, &mut buffer)?;
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
/// Encodes a supported q value as a full q IPC message.
|
||||
///
|
||||
/// Returns `UnsupportedEndianness` for big-endian encoding.
|
||||
pub fn encode_message(
|
||||
value: &Value,
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
) -> CoreResult<Vec<u8>> {
|
||||
let body = encode_value(value, encoding)?;
|
||||
serialize_body_as_message(&body, encoding, message_type, compression)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::decode::decode_value;
|
||||
use crate::protocol::Attribute;
|
||||
use crate::value::Dictionary;
|
||||
use crate::value::List;
|
||||
use crate::value::Table;
|
||||
|
||||
#[test]
|
||||
fn encode_int_atom_body() {
|
||||
let value = Value::Atom(Atom::Int(42));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
|
||||
assert_eq!(body, vec![250, 42, 0, 0, 0]);
|
||||
assert_eq!(
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||
value
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_rejects_big_endian() {
|
||||
let value = Value::Vector(Vector::new(
|
||||
Attribute::Sorted,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
));
|
||||
assert!(matches!(
|
||||
encode_value(&value, Encoding::BigEndian),
|
||||
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_symbol_vector_body() {
|
||||
let value = Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"alpha"),
|
||||
bytes::Bytes::from_static(b"beta"),
|
||||
]),
|
||||
));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
body,
|
||||
bytes::Bytes::from_static(b"\x0b\x00\x02\0\0\0alpha\0beta\0")
|
||||
);
|
||||
assert_eq!(
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||
value
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_list_body() {
|
||||
let value = Value::List(List::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
Value::Atom(Atom::Int(42)),
|
||||
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab"))),
|
||||
],
|
||||
));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||
value
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_dictionary_body() {
|
||||
let value = Value::Dictionary(Dictionary::new(
|
||||
false,
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
|
||||
));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||
value
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_table_body() {
|
||||
let value = Value::Table(Table::new(
|
||||
Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"sym"),
|
||||
bytes::Bytes::from_static(b"px"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
Attribute::None,
|
||||
VectorData::from_i32s(&[10, 20]),
|
||||
)),
|
||||
],
|
||||
));
|
||||
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||
value
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_rejects_malformed_table_structure() {
|
||||
let value = Value::Table(Table::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"sym"),
|
||||
bytes::Bytes::from_static(b"px"),
|
||||
],
|
||||
vec![Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![bytes::Bytes::from_static(b"a")]),
|
||||
))],
|
||||
));
|
||||
|
||||
let err = encode_value(&value, Encoding::LittleEndian).unwrap_err();
|
||||
assert!(matches!(err, crate::error::CoreError::InvalidStructure(_)));
|
||||
}
|
||||
}
|
||||
112
crates/qroissant-core/src/error.rs
Normal file
112
crates/qroissant-core/src/error.rs
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
use std::error::Error;
|
||||
use std::fmt;
|
||||
|
||||
use crate::frame::Compression;
|
||||
use crate::frame::Encoding;
|
||||
|
||||
/// Core result type used across the qroissant core crate.
|
||||
pub type CoreResult<T> = Result<T, CoreError>;
|
||||
|
||||
/// Errors produced by low-level q IPC frame handling.
|
||||
#[derive(Debug)]
|
||||
pub enum CoreError {
|
||||
InvalidEncoding(u8),
|
||||
InvalidMessageType(u8),
|
||||
InvalidCompression(u8),
|
||||
InvalidAttribute(i8),
|
||||
InvalidTypeCode(i8),
|
||||
InvalidMessageLength(usize),
|
||||
InvalidCollectionLength(i32),
|
||||
InvalidStructure(String),
|
||||
TruncatedHeader { actual: usize },
|
||||
FrameLengthMismatch { declared: usize, actual: usize },
|
||||
TrailingBodyBytes(usize),
|
||||
UnsupportedEndianness(Encoding),
|
||||
UnsupportedCompression(Compression),
|
||||
UnsupportedTypeCode(i8),
|
||||
LengthOverflow(usize),
|
||||
Io(std::io::Error),
|
||||
QRuntime(String),
|
||||
}
|
||||
|
||||
impl fmt::Display for CoreError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::InvalidEncoding(value) => write!(
|
||||
f,
|
||||
"invalid q IPC encoding value {value}; expected 0 (big-endian) or 1 (little-endian)"
|
||||
),
|
||||
Self::InvalidMessageType(value) => write!(
|
||||
f,
|
||||
"invalid q IPC message type value {value}; expected 0 (asynchronous), 1 (synchronous), or 2 (response)"
|
||||
),
|
||||
Self::InvalidCompression(value) => write!(
|
||||
f,
|
||||
"invalid q IPC compression value {value}; expected 0 (uncompressed), 1 (compressed), or 2 (compressed large)"
|
||||
),
|
||||
Self::InvalidAttribute(value) => write!(
|
||||
f,
|
||||
"invalid q attribute value {value}; expected 0 (none), 1 (sorted), 2 (unique), 3 (parted), or 4 (grouped)"
|
||||
),
|
||||
Self::InvalidTypeCode(value) => write!(f, "invalid q IPC type code {value}"),
|
||||
Self::InvalidMessageLength(length) => {
|
||||
write!(
|
||||
f,
|
||||
"invalid q IPC message length {length}; minimum is 8 bytes"
|
||||
)
|
||||
}
|
||||
Self::InvalidCollectionLength(length) => {
|
||||
write!(
|
||||
f,
|
||||
"invalid q collection length {length}; length must be non-negative"
|
||||
)
|
||||
}
|
||||
Self::InvalidStructure(message) => write!(f, "{message}"),
|
||||
Self::TruncatedHeader { actual } => write!(
|
||||
f,
|
||||
"truncated q IPC header: expected 8 bytes, received {actual}"
|
||||
),
|
||||
Self::FrameLengthMismatch { declared, actual } => write!(
|
||||
f,
|
||||
"q IPC header declares {declared} bytes, but frame contains {actual}"
|
||||
),
|
||||
Self::TrailingBodyBytes(remaining) => write!(
|
||||
f,
|
||||
"q IPC body contains {remaining} trailing bytes after the decoded value"
|
||||
),
|
||||
Self::UnsupportedEndianness(encoding) => write!(
|
||||
f,
|
||||
"serialization currently supports only little-endian q IPC frames, got {encoding:?}"
|
||||
),
|
||||
Self::UnsupportedCompression(compression) => write!(
|
||||
f,
|
||||
"serialization currently supports only uncompressed q IPC frames, got {compression:?}"
|
||||
),
|
||||
Self::UnsupportedTypeCode(value) => write!(
|
||||
f,
|
||||
"q IPC type code {value} is valid but not implemented yet in the current decoder"
|
||||
),
|
||||
Self::LengthOverflow(length) => write!(
|
||||
f,
|
||||
"q IPC frame length {length} exceeds 32-bit header capacity"
|
||||
),
|
||||
Self::Io(error) => error.fmt(f),
|
||||
Self::QRuntime(message) => write!(f, "q runtime error: {message}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for CoreError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
Self::Io(error) => Some(error),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for CoreError {
|
||||
fn from(value: std::io::Error) -> Self {
|
||||
Self::Io(value)
|
||||
}
|
||||
}
|
||||
518
crates/qroissant-core/src/extent.rs
Normal file
518
crates/qroissant-core/src/extent.rs
Normal file
|
|
@ -0,0 +1,518 @@
|
|||
//! Zero-allocation byte extent calculator for serialized q IPC values.
|
||||
//!
|
||||
//! Given a byte slice and an offset pointing to the start of a serialized q
|
||||
//! value, [`value_byte_extent`] returns how many bytes that value occupies
|
||||
//! without allocating memory or constructing a [`Value`]. This is used by
|
||||
//! the parallel column decoder to split a table's column data into
|
||||
//! independent sub-slices before dispatching them to worker threads.
|
||||
|
||||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
use crate::protocol::Primitive;
|
||||
use crate::protocol::Shape;
|
||||
use crate::protocol::TypeCode;
|
||||
|
||||
/// Returns the byte extent of a serialized q value starting at `bytes[offset..]`.
|
||||
///
|
||||
/// The function reads only type codes, attributes, and lengths — it never
|
||||
/// allocates or constructs a `Value`. For fixed-width vectors this is O(1);
|
||||
/// for symbol vectors and nested structures it scans forward.
|
||||
pub fn value_byte_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||
if offset >= bytes.len() {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"extent: offset {offset} beyond buffer length {}",
|
||||
bytes.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let type_code = TypeCode::try_from(bytes[offset] as i8)?;
|
||||
let shape = type_code.shape();
|
||||
|
||||
match shape {
|
||||
Shape::Atom => atom_extent(bytes, offset, type_code),
|
||||
Shape::Vector => vector_extent(bytes, offset, type_code),
|
||||
Shape::List => list_extent(bytes, offset),
|
||||
Shape::Dictionary => dictionary_extent(bytes, offset),
|
||||
Shape::Table => table_extent(bytes, offset),
|
||||
Shape::UnaryPrimitive => {
|
||||
// type byte + opcode byte
|
||||
check_available(bytes, offset, 2)?;
|
||||
Ok(2)
|
||||
}
|
||||
Shape::Error => {
|
||||
check_available(bytes, offset, 1)?;
|
||||
let data_start = offset + 1;
|
||||
let pos = bytes[data_start..]
|
||||
.iter()
|
||||
.position(|&b| b == 0)
|
||||
.ok_or_else(|| {
|
||||
CoreError::InvalidStructure(format!(
|
||||
"extent: unterminated error string at offset {offset}"
|
||||
))
|
||||
})?;
|
||||
Ok(1 + pos + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks that at least `need` bytes are available from `offset`.
|
||||
#[inline]
|
||||
fn check_available(bytes: &[u8], offset: usize, need: usize) -> CoreResult<()> {
|
||||
if offset + need > bytes.len() {
|
||||
Err(CoreError::InvalidStructure(format!(
|
||||
"extent: need {need} bytes at offset {offset}, but buffer length is {}",
|
||||
bytes.len()
|
||||
)))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads an i32 length field at `bytes[offset..offset+4]` (little-endian).
|
||||
#[inline]
|
||||
fn read_len(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||
check_available(bytes, offset, 4)?;
|
||||
let len = i32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap());
|
||||
if len < 0 {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"extent: negative length {len} at offset {offset}"
|
||||
)));
|
||||
}
|
||||
Ok(len as usize)
|
||||
}
|
||||
|
||||
fn atom_extent(bytes: &[u8], offset: usize, type_code: TypeCode) -> CoreResult<usize> {
|
||||
// 1 byte for type code + data bytes
|
||||
let primitive = type_code
|
||||
.primitive()
|
||||
.ok_or(CoreError::InvalidTypeCode(type_code as i8))?;
|
||||
|
||||
if let Some(width) = primitive.width() {
|
||||
check_available(bytes, offset, 1 + width)?;
|
||||
Ok(1 + width)
|
||||
} else {
|
||||
// Symbol atom: scan for null terminator
|
||||
debug_assert_eq!(primitive, Primitive::Symbol);
|
||||
let data_start = offset + 1;
|
||||
let pos = bytes[data_start..]
|
||||
.iter()
|
||||
.position(|&b| b == 0)
|
||||
.ok_or_else(|| {
|
||||
CoreError::InvalidStructure(format!(
|
||||
"extent: unterminated symbol atom at offset {offset}"
|
||||
))
|
||||
})?;
|
||||
// type byte + symbol bytes + null terminator
|
||||
Ok(1 + pos + 1)
|
||||
}
|
||||
}
|
||||
|
||||
fn vector_extent(bytes: &[u8], offset: usize, type_code: TypeCode) -> CoreResult<usize> {
|
||||
// Header: 1 (type) + 1 (attribute) + 4 (length) = 6 bytes
|
||||
const HEADER: usize = 6;
|
||||
check_available(bytes, offset, HEADER)?;
|
||||
let length = read_len(bytes, offset + 2)?;
|
||||
|
||||
let primitive = type_code
|
||||
.primitive()
|
||||
.ok_or(CoreError::InvalidTypeCode(type_code as i8))?;
|
||||
|
||||
if let Some(width) = primitive.width() {
|
||||
let data_bytes = length
|
||||
.checked_mul(width)
|
||||
.ok_or(CoreError::LengthOverflow(length))?;
|
||||
check_available(bytes, offset, HEADER + data_bytes)?;
|
||||
Ok(HEADER + data_bytes)
|
||||
} else {
|
||||
// Symbol vector: scan through `length` null-terminated strings
|
||||
debug_assert_eq!(primitive, Primitive::Symbol);
|
||||
let mut scan = offset + HEADER;
|
||||
for _ in 0..length {
|
||||
let pos = bytes[scan..].iter().position(|&b| b == 0).ok_or_else(|| {
|
||||
CoreError::InvalidStructure(format!(
|
||||
"extent: unterminated symbol in vector at offset {scan}"
|
||||
))
|
||||
})?;
|
||||
scan += pos + 1; // skip past the null terminator
|
||||
}
|
||||
Ok(scan - offset)
|
||||
}
|
||||
}
|
||||
|
||||
fn list_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||
// Header: 1 (type) + 1 (attribute) + 4 (length) = 6 bytes
|
||||
const HEADER: usize = 6;
|
||||
check_available(bytes, offset, HEADER)?;
|
||||
let length = read_len(bytes, offset + 2)?;
|
||||
|
||||
let mut scan = offset + HEADER;
|
||||
for _ in 0..length {
|
||||
let child_extent = value_byte_extent(bytes, scan)?;
|
||||
scan += child_extent;
|
||||
}
|
||||
Ok(scan - offset)
|
||||
}
|
||||
|
||||
fn dictionary_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||
// 1 byte for type code (99 or 127), then keys value, then values value
|
||||
check_available(bytes, offset, 1)?;
|
||||
let keys_extent = value_byte_extent(bytes, offset + 1)?;
|
||||
let values_extent = value_byte_extent(bytes, offset + 1 + keys_extent)?;
|
||||
Ok(1 + keys_extent + values_extent)
|
||||
}
|
||||
|
||||
fn table_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||
// 1 byte type code + 1 byte attribute + inner dictionary
|
||||
check_available(bytes, offset, 2)?;
|
||||
let dict_extent = value_byte_extent(bytes, offset + 2)?;
|
||||
Ok(2 + dict_extent)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::decode::decode_value;
|
||||
use crate::encode::encode_value;
|
||||
use crate::frame::Encoding;
|
||||
use crate::value::*;
|
||||
|
||||
/// Helper: encode a value, then verify extent equals encoded body length.
|
||||
fn assert_extent_matches(value: &Value) {
|
||||
let body = encode_value(value, Encoding::LittleEndian).unwrap();
|
||||
let extent = value_byte_extent(&body, 0).unwrap();
|
||||
assert_eq!(
|
||||
extent,
|
||||
body.len(),
|
||||
"extent mismatch for {value:?}: expected {}, got {extent}",
|
||||
body.len()
|
||||
);
|
||||
}
|
||||
|
||||
// -- Atoms --
|
||||
|
||||
#[test]
|
||||
fn extent_boolean_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Boolean(true)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_byte_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Byte(0x42)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_short_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Short(42)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_int_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Int(42)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_long_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Long(42)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_real_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Real(1.5)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_float_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Float(1.5)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_char_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Char(b'c')));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_symbol_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Symbol(bytes::Bytes::from_static(
|
||||
b"hello",
|
||||
))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_empty_symbol_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b""))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_guid_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Guid([0u8; 16])));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_timestamp_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Timestamp(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_month_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Month(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_date_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Date(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_datetime_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Datetime(1.5)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_timespan_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Timespan(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_minute_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Minute(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_second_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Second(1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_time_atom() {
|
||||
assert_extent_matches(&Value::Atom(Atom::Time(1)));
|
||||
}
|
||||
|
||||
// -- Vectors --
|
||||
|
||||
#[test]
|
||||
fn extent_int_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_empty_int_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_symbol_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"alpha"),
|
||||
bytes::Bytes::from_static(b"beta"),
|
||||
]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_empty_symbol_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_boolean_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0, 1])),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_guid_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_long_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i64s(&[1, 2, 3]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_float_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_f64s(&[1.0, 2.0]),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_char_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Char(bytes::Bytes::from_static(b"hello")),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_byte_vector() {
|
||||
assert_extent_matches(&Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Byte(bytes::Bytes::from(vec![1, 2, 3])),
|
||||
)));
|
||||
}
|
||||
|
||||
// -- Composites --
|
||||
|
||||
#[test]
|
||||
fn extent_general_list() {
|
||||
assert_extent_matches(&Value::List(List::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![
|
||||
Value::Atom(Atom::Int(42)),
|
||||
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab"))),
|
||||
],
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_empty_list() {
|
||||
assert_extent_matches(&Value::List(List::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![],
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_dictionary() {
|
||||
assert_extent_matches(&Value::Dictionary(Dictionary::new(
|
||||
false,
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2]),
|
||||
)),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_sorted_dictionary() {
|
||||
assert_extent_matches(&Value::Dictionary(Dictionary::new(
|
||||
true,
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::Sorted,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2]),
|
||||
)),
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_table() {
|
||||
assert_extent_matches(&Value::Table(Table::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"sym"),
|
||||
bytes::Bytes::from_static(b"px"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[10, 20]),
|
||||
)),
|
||||
],
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_nested_list() {
|
||||
assert_extent_matches(&Value::List(List::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[4, 5]),
|
||||
)),
|
||||
],
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extent_unary_primitive() {
|
||||
let value = Value::UnaryPrimitive { opcode: 42 };
|
||||
assert_extent_matches(&value);
|
||||
}
|
||||
|
||||
/// Verify extent matches for every value encoded in a real roundtrip body.
|
||||
#[test]
|
||||
fn extent_matches_decode_consumption() {
|
||||
// Encode a table, get the body, verify extent == body.len()
|
||||
let table = Value::Table(Table::new(
|
||||
crate::protocol::Attribute::None,
|
||||
vec![
|
||||
bytes::Bytes::from_static(b"a"),
|
||||
bytes::Bytes::from_static(b"b"),
|
||||
bytes::Bytes::from_static(b"c"),
|
||||
],
|
||||
vec![
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_i32s(&[1, 2, 3]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::Symbol(vec![
|
||||
bytes::Bytes::from_static(b"x"),
|
||||
bytes::Bytes::from_static(b"y"),
|
||||
bytes::Bytes::from_static(b"z"),
|
||||
]),
|
||||
)),
|
||||
Value::Vector(Vector::new(
|
||||
crate::protocol::Attribute::None,
|
||||
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
|
||||
)),
|
||||
],
|
||||
));
|
||||
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||
let extent = value_byte_extent(&body, 0).unwrap();
|
||||
assert_eq!(extent, body.len());
|
||||
|
||||
// Also verify roundtrip
|
||||
let decoded =
|
||||
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap();
|
||||
assert_eq!(decoded, table);
|
||||
}
|
||||
}
|
||||
826
crates/qroissant-core/src/frame.rs
Normal file
826
crates/qroissant-core/src/frame.rs
Normal file
|
|
@ -0,0 +1,826 @@
|
|||
use std::io::Read;
|
||||
|
||||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
|
||||
/// Fixed byte length of every q IPC message header.
|
||||
pub const HEADER_LEN: usize = 8;
|
||||
|
||||
/// Endianness marker stored in the first q IPC header byte.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum Encoding {
|
||||
BigEndian,
|
||||
#[default]
|
||||
LittleEndian,
|
||||
}
|
||||
|
||||
impl Encoding {
|
||||
fn decode_u32(self, bytes: [u8; 4]) -> u32 {
|
||||
match self {
|
||||
Self::BigEndian => u32::from_be_bytes(bytes),
|
||||
Self::LittleEndian => u32::from_le_bytes(bytes),
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_u32(self, value: u32) -> [u8; 4] {
|
||||
match self {
|
||||
Self::BigEndian => value.to_be_bytes(),
|
||||
Self::LittleEndian => value.to_le_bytes(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Encoding> for u8 {
|
||||
fn from(value: Encoding) -> Self {
|
||||
match value {
|
||||
Encoding::BigEndian => 0,
|
||||
Encoding::LittleEndian => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for Encoding {
|
||||
type Error = CoreError;
|
||||
|
||||
fn try_from(value: u8) -> CoreResult<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::BigEndian),
|
||||
1 => Ok(Self::LittleEndian),
|
||||
_ => Err(CoreError::InvalidEncoding(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// q IPC message kind stored in the second q IPC header byte.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum MessageType {
|
||||
#[default]
|
||||
Asynchronous,
|
||||
Synchronous,
|
||||
Response,
|
||||
}
|
||||
|
||||
impl From<MessageType> for u8 {
|
||||
fn from(value: MessageType) -> Self {
|
||||
match value {
|
||||
MessageType::Asynchronous => 0,
|
||||
MessageType::Synchronous => 1,
|
||||
MessageType::Response => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for MessageType {
|
||||
type Error = CoreError;
|
||||
|
||||
fn try_from(value: u8) -> CoreResult<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::Asynchronous),
|
||||
1 => Ok(Self::Synchronous),
|
||||
2 => Ok(Self::Response),
|
||||
_ => Err(CoreError::InvalidMessageType(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// q IPC compression marker stored in the third q IPC header byte.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum Compression {
|
||||
#[default]
|
||||
Uncompressed,
|
||||
Compressed,
|
||||
CompressedLarge,
|
||||
}
|
||||
|
||||
impl From<Compression> for u8 {
|
||||
fn from(value: Compression) -> Self {
|
||||
match value {
|
||||
Compression::Uncompressed => 0,
|
||||
Compression::Compressed => 1,
|
||||
Compression::CompressedLarge => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for Compression {
|
||||
type Error = CoreError;
|
||||
|
||||
fn try_from(value: u8) -> CoreResult<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::Uncompressed),
|
||||
1 => Ok(Self::Compressed),
|
||||
2 => Ok(Self::CompressedLarge),
|
||||
_ => Err(CoreError::InvalidCompression(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decoded q IPC message header.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct MessageHeader {
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
size: usize,
|
||||
}
|
||||
|
||||
impl MessageHeader {
|
||||
/// Builds a validated message header.
|
||||
pub fn new(
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
size: usize,
|
||||
) -> CoreResult<Self> {
|
||||
if size < HEADER_LEN {
|
||||
return Err(CoreError::InvalidMessageLength(size));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
encoding,
|
||||
message_type,
|
||||
compression,
|
||||
size,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a message header from an exact 8-byte array.
|
||||
pub fn from_bytes(bytes: [u8; HEADER_LEN]) -> CoreResult<Self> {
|
||||
let encoding = Encoding::try_from(bytes[0])?;
|
||||
let message_type = MessageType::try_from(bytes[1])?;
|
||||
let compression = Compression::try_from(bytes[2])?;
|
||||
let size = encoding.decode_u32(bytes[4..8].try_into().expect("fixed-size slice")) as usize;
|
||||
Self::new(encoding, message_type, compression, size)
|
||||
}
|
||||
|
||||
/// Parses a message header from a byte slice.
|
||||
pub fn parse(bytes: &[u8]) -> CoreResult<Self> {
|
||||
let header: [u8; HEADER_LEN] = bytes
|
||||
.get(..HEADER_LEN)
|
||||
.ok_or(CoreError::TruncatedHeader {
|
||||
actual: bytes.len(),
|
||||
})?
|
||||
.try_into()
|
||||
.expect("header slice length already checked");
|
||||
Self::from_bytes(header)
|
||||
}
|
||||
|
||||
/// Serializes the header back to its q IPC byte representation.
|
||||
pub fn to_bytes(self) -> CoreResult<[u8; HEADER_LEN]> {
|
||||
let size = u32::try_from(self.size).map_err(|_| CoreError::LengthOverflow(self.size))?;
|
||||
let mut bytes = [0_u8; HEADER_LEN];
|
||||
bytes[0] = self.encoding.into();
|
||||
bytes[1] = self.message_type.into();
|
||||
bytes[2] = self.compression.into();
|
||||
bytes[4..8].copy_from_slice(&self.encoding.encode_u32(size));
|
||||
Ok(bytes)
|
||||
}
|
||||
|
||||
pub fn encoding(self) -> Encoding {
|
||||
self.encoding
|
||||
}
|
||||
|
||||
pub fn message_type(self) -> MessageType {
|
||||
self.message_type
|
||||
}
|
||||
|
||||
pub fn compression(self) -> Compression {
|
||||
self.compression
|
||||
}
|
||||
|
||||
pub fn size(self) -> usize {
|
||||
self.size
|
||||
}
|
||||
|
||||
pub fn body_len(self) -> usize {
|
||||
self.size - HEADER_LEN
|
||||
}
|
||||
}
|
||||
|
||||
/// Borrowed validated q IPC frame.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Frame<'a> {
|
||||
header: MessageHeader,
|
||||
body: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> Frame<'a> {
|
||||
/// Validates a full q IPC frame and returns borrowed header/body views.
|
||||
pub fn parse(bytes: &'a [u8]) -> CoreResult<Self> {
|
||||
let header = MessageHeader::parse(bytes)?;
|
||||
if bytes.len() != header.size() {
|
||||
return Err(CoreError::FrameLengthMismatch {
|
||||
declared: header.size(),
|
||||
actual: bytes.len(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
header,
|
||||
body: &bytes[HEADER_LEN..],
|
||||
})
|
||||
}
|
||||
|
||||
pub fn header(self) -> MessageHeader {
|
||||
self.header
|
||||
}
|
||||
|
||||
pub fn body(self) -> &'a [u8] {
|
||||
self.body
|
||||
}
|
||||
}
|
||||
|
||||
/// Decompresses a q IPC compressed body (follows the 8-byte header).
|
||||
///
|
||||
/// The first 4 bytes of the compressed body are a size prefix encoding the
|
||||
/// total decompressed message length including the 8-byte header. The
|
||||
/// remaining bytes are the compressed payload using q's LZW-style algorithm:
|
||||
/// a flag byte drives 8 decisions — bit clear emits a literal byte, bit set
|
||||
/// emits a back-reference (2 fixed bytes + n extra bytes) via a 256-entry
|
||||
/// XOR-keyed lookup table.
|
||||
pub fn decompress_ipc_body(compressed: &[u8], encoding: Encoding) -> CoreResult<Vec<u8>> {
|
||||
if compressed.len() < 4 {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"compressed body must be at least 4 bytes for size prefix, got {}",
|
||||
compressed.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let size_with_header = match encoding {
|
||||
Encoding::LittleEndian => {
|
||||
i32::from_le_bytes(compressed[..4].try_into().expect("validated length"))
|
||||
}
|
||||
Encoding::BigEndian => {
|
||||
i32::from_be_bytes(compressed[..4].try_into().expect("validated length"))
|
||||
}
|
||||
};
|
||||
if size_with_header < 8 {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"compressed size prefix {size_with_header} is less than minimum header size 8"
|
||||
)));
|
||||
}
|
||||
let size = (size_with_header - 8) as usize;
|
||||
|
||||
let mut decompressed = vec![0_u8; size];
|
||||
let mut aa = [0_i32; 256];
|
||||
let mut n = 0_usize;
|
||||
let mut f = 0_usize;
|
||||
let mut s = 0_usize;
|
||||
let mut p = 0_usize;
|
||||
let mut i = 0_usize;
|
||||
let mut d = 4_usize; // skip the 4-byte size prefix
|
||||
|
||||
while s < size {
|
||||
if i == 0 {
|
||||
if d >= compressed.len() {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"unexpected end of compressed data while reading flag byte".to_string(),
|
||||
));
|
||||
}
|
||||
f = compressed[d] as usize;
|
||||
d += 1;
|
||||
i = 1;
|
||||
}
|
||||
|
||||
if (f & i) != 0 {
|
||||
// Back-reference: lookup key byte + extra count byte
|
||||
if d + 2 > compressed.len() {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"insufficient data for back-reference (need 2 bytes)".to_string(),
|
||||
));
|
||||
}
|
||||
let mut r = aa[compressed[d] as usize] as usize;
|
||||
d += 1;
|
||||
|
||||
if r >= size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"back-reference start {r} exceeds decompressed buffer size {size}"
|
||||
)));
|
||||
}
|
||||
if s >= size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"write index {s} exceeds decompressed buffer size {size}"
|
||||
)));
|
||||
}
|
||||
decompressed[s] = decompressed[r];
|
||||
s += 1;
|
||||
r += 1;
|
||||
|
||||
if r >= size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"back-reference position {r} exceeds decompressed buffer size {size}"
|
||||
)));
|
||||
}
|
||||
if s >= size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"write index {s} exceeds decompressed buffer size {size}"
|
||||
)));
|
||||
}
|
||||
decompressed[s] = decompressed[r];
|
||||
s += 1;
|
||||
r += 1;
|
||||
|
||||
n = compressed[d] as usize;
|
||||
d += 1;
|
||||
|
||||
if r + n > size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"back-reference range {r}..{} exceeds decompressed buffer size {size}",
|
||||
r + n
|
||||
)));
|
||||
}
|
||||
if s + n > size {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"write range {s}..{} exceeds decompressed buffer size {size}",
|
||||
s + n
|
||||
)));
|
||||
}
|
||||
for m in 0..n {
|
||||
decompressed[s + m] = decompressed[r + m];
|
||||
}
|
||||
} else {
|
||||
// Literal byte
|
||||
if d >= compressed.len() {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"unexpected end of compressed data while reading literal byte".to_string(),
|
||||
));
|
||||
}
|
||||
decompressed[s] = compressed[d];
|
||||
s += 1;
|
||||
d += 1;
|
||||
}
|
||||
|
||||
// Update the XOR lookup table with newly emitted bytes
|
||||
while p < s.saturating_sub(1) {
|
||||
aa[(decompressed[p] ^ decompressed[p + 1]) as usize] = p as i32;
|
||||
p += 1;
|
||||
}
|
||||
|
||||
if (f & i) != 0 {
|
||||
s += n;
|
||||
p = s;
|
||||
}
|
||||
|
||||
i *= 2;
|
||||
if i == 256 {
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(decompressed)
|
||||
}
|
||||
|
||||
/// Serializes a q-encoded body as a complete q IPC message.
|
||||
///
|
||||
/// This mirrors the current rewrite contract: qroissant only emits
|
||||
/// little-endian, uncompressed frames for now.
|
||||
pub fn serialize_body_as_message(
|
||||
body: &[u8],
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
) -> CoreResult<Vec<u8>> {
|
||||
if encoding != Encoding::LittleEndian {
|
||||
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||
}
|
||||
if compression != Compression::Uncompressed {
|
||||
return Err(CoreError::UnsupportedCompression(compression));
|
||||
}
|
||||
|
||||
let size = HEADER_LEN
|
||||
.checked_add(body.len())
|
||||
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||
let header = MessageHeader::new(encoding, message_type, compression, size)?;
|
||||
let mut payload = Vec::with_capacity(size);
|
||||
payload.extend_from_slice(&header.to_bytes()?);
|
||||
payload.extend_from_slice(body);
|
||||
Ok(payload)
|
||||
}
|
||||
|
||||
/// Reads the total q IPC frame length from an 8-byte header.
|
||||
pub fn read_message_length(header: &[u8; HEADER_LEN]) -> CoreResult<usize> {
|
||||
Ok(MessageHeader::from_bytes(*header)?.size())
|
||||
}
|
||||
|
||||
/// Reads one complete q IPC frame from an IO stream.
|
||||
pub fn read_frame<R: Read>(reader: &mut R) -> CoreResult<Vec<u8>> {
|
||||
let mut header = [0_u8; HEADER_LEN];
|
||||
reader.read_exact(&mut header)?;
|
||||
let frame_len = read_message_length(&header)?;
|
||||
let mut frame = vec![0_u8; frame_len];
|
||||
frame[..HEADER_LEN].copy_from_slice(&header);
|
||||
reader.read_exact(&mut frame[HEADER_LEN..])?;
|
||||
Ok(frame)
|
||||
}
|
||||
|
||||
/// Incremental q IPC decompressor that can be fed compressed bytes as they
|
||||
/// arrive from the network, overlapping I/O with decompression work.
|
||||
///
|
||||
/// The q LZW algorithm reads compressed input forward-only — back-references
|
||||
/// target the *output* buffer, not the input. This means we can process
|
||||
/// compressed bytes as soon as they arrive without buffering the entire
|
||||
/// compressed payload first.
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// ```ignore
|
||||
/// let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian)?;
|
||||
/// while !dec.is_complete() {
|
||||
/// let chunk = read_from_network()?;
|
||||
/// dec.feed(&chunk)?;
|
||||
/// }
|
||||
/// let body = dec.finish()?;
|
||||
/// ```
|
||||
pub struct StreamingDecompressor {
|
||||
decompressed: Vec<u8>,
|
||||
aa: [i32; 256],
|
||||
compressed_buf: Vec<u8>,
|
||||
d: usize,
|
||||
s: usize,
|
||||
p: usize,
|
||||
f: usize,
|
||||
i: usize,
|
||||
size: usize,
|
||||
read_ptr: usize,
|
||||
}
|
||||
|
||||
impl StreamingDecompressor {
|
||||
/// Creates a new streaming decompressor from the 4-byte size prefix
|
||||
/// (the first 4 bytes of the compressed body after the 8-byte header).
|
||||
pub fn new(size_prefix: [u8; 4], encoding: Encoding) -> CoreResult<Self> {
|
||||
let size_with_header = match encoding {
|
||||
Encoding::LittleEndian => i32::from_le_bytes(size_prefix),
|
||||
Encoding::BigEndian => i32::from_be_bytes(size_prefix),
|
||||
};
|
||||
if size_with_header < 8 {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"compressed size prefix {size_with_header} is less than minimum header size 8"
|
||||
)));
|
||||
}
|
||||
let size = (size_with_header - 8) as usize;
|
||||
|
||||
Ok(Self {
|
||||
decompressed: vec![0_u8; size],
|
||||
aa: [0_i32; 256],
|
||||
compressed_buf: Vec::new(),
|
||||
d: 0,
|
||||
s: 0,
|
||||
p: 0,
|
||||
f: 0,
|
||||
i: 0,
|
||||
size,
|
||||
read_ptr: 0,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn feed(&mut self, chunk: &[u8]) -> CoreResult<usize> {
|
||||
self.compressed_buf.extend_from_slice(chunk);
|
||||
let prev_s = self.s;
|
||||
|
||||
while self.s < self.size {
|
||||
if self.i == 0 {
|
||||
if self.d >= self.compressed_buf.len() {
|
||||
break;
|
||||
}
|
||||
self.f = self.compressed_buf[self.d] as usize;
|
||||
self.d += 1;
|
||||
self.i = 1;
|
||||
}
|
||||
|
||||
let is_backref = (self.f & self.i) != 0;
|
||||
let mut n = 0;
|
||||
|
||||
if is_backref {
|
||||
if self.d + 2 > self.compressed_buf.len() {
|
||||
break;
|
||||
}
|
||||
let mut r = self.aa[self.compressed_buf[self.d] as usize] as usize;
|
||||
self.d += 1;
|
||||
if r >= self.size || self.s + 2 > self.size {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"backref out of bounds".to_string(),
|
||||
));
|
||||
}
|
||||
self.decompressed[self.s] = self.decompressed[r];
|
||||
self.s += 1;
|
||||
r += 1;
|
||||
|
||||
if r >= self.size || self.s + 1 > self.size {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"backref out of bounds".to_string(),
|
||||
));
|
||||
}
|
||||
self.decompressed[self.s] = self.decompressed[r];
|
||||
self.s += 1;
|
||||
r += 1;
|
||||
|
||||
n = self.compressed_buf[self.d] as usize;
|
||||
self.d += 1;
|
||||
if r + n > self.size || self.s + n > self.size {
|
||||
return Err(CoreError::InvalidStructure(
|
||||
"backref out of bounds".to_string(),
|
||||
));
|
||||
}
|
||||
for m in 0..n {
|
||||
self.decompressed[self.s + m] = self.decompressed[r + m];
|
||||
}
|
||||
} else {
|
||||
if self.d >= self.compressed_buf.len() {
|
||||
break;
|
||||
}
|
||||
self.decompressed[self.s] = self.compressed_buf[self.d];
|
||||
self.s += 1;
|
||||
self.d += 1;
|
||||
}
|
||||
|
||||
// Sync lookup table
|
||||
while self.p < self.s.saturating_sub(1) {
|
||||
self.aa[(self.decompressed[self.p] ^ self.decompressed[self.p + 1]) as usize] =
|
||||
self.p as i32;
|
||||
self.p += 1;
|
||||
}
|
||||
|
||||
if is_backref {
|
||||
self.s += n;
|
||||
self.p = self.s;
|
||||
}
|
||||
|
||||
self.i *= 2;
|
||||
if self.i == 256 {
|
||||
self.i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep memory usage in check by draining processed bytes
|
||||
if self.d > 0 {
|
||||
self.compressed_buf.drain(0..self.d);
|
||||
self.d = 0;
|
||||
}
|
||||
|
||||
Ok(self.s - prev_s)
|
||||
}
|
||||
|
||||
/// Returns `true` when decompression is complete.
|
||||
pub fn is_complete(&self) -> bool {
|
||||
self.s >= self.size
|
||||
}
|
||||
|
||||
/// Current number of decompressed bytes available.
|
||||
pub fn decompressed_len(&self) -> usize {
|
||||
self.s
|
||||
}
|
||||
|
||||
/// Number of decompressed bytes that have not yet been read.
|
||||
pub fn unread_len(&self) -> usize {
|
||||
self.s - self.read_ptr
|
||||
}
|
||||
|
||||
/// Returns a slice of the next available decompressed bytes.
|
||||
pub fn next_chunk(&self) -> &[u8] {
|
||||
&self.decompressed[self.read_ptr..self.s]
|
||||
}
|
||||
|
||||
/// Advances the read pointer by `len` bytes.
|
||||
pub fn consume(&mut self, len: usize) {
|
||||
self.read_ptr = (self.read_ptr + len).min(self.s);
|
||||
}
|
||||
|
||||
/// Total expected decompressed size.
|
||||
pub fn total_size(&self) -> usize {
|
||||
self.size
|
||||
}
|
||||
|
||||
/// Borrows the decompressed output produced so far.
|
||||
pub fn decompressed(&self) -> &[u8] {
|
||||
&self.decompressed[..self.s]
|
||||
}
|
||||
|
||||
/// Consumes the decompressor and returns the completed output buffer.
|
||||
///
|
||||
/// Returns an error if decompression is not yet complete.
|
||||
pub fn finish(self) -> CoreResult<Vec<u8>> {
|
||||
if !self.is_complete() {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"streaming decompress: incomplete — {}/{} bytes decompressed",
|
||||
self.s, self.size
|
||||
)));
|
||||
}
|
||||
Ok(self.decompressed)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encoding_round_trips_from_u8() {
|
||||
assert_eq!(Encoding::try_from(0).unwrap(), Encoding::BigEndian);
|
||||
assert_eq!(Encoding::try_from(1).unwrap(), Encoding::LittleEndian);
|
||||
assert!(matches!(
|
||||
Encoding::try_from(9),
|
||||
Err(CoreError::InvalidEncoding(9))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compression_supports_compressed_large() {
|
||||
assert_eq!(Compression::try_from(0).unwrap(), Compression::Uncompressed);
|
||||
assert_eq!(Compression::try_from(1).unwrap(), Compression::Compressed);
|
||||
assert_eq!(
|
||||
Compression::try_from(2).unwrap(),
|
||||
Compression::CompressedLarge
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_parses_little_endian_payloads() {
|
||||
let header = MessageHeader::from_bytes([1, 2, 2, 0, 24, 0, 0, 0]).unwrap();
|
||||
|
||||
assert_eq!(header.encoding(), Encoding::LittleEndian);
|
||||
assert_eq!(header.message_type(), MessageType::Response);
|
||||
assert_eq!(header.compression(), Compression::CompressedLarge);
|
||||
assert_eq!(header.size(), 24);
|
||||
assert_eq!(header.body_len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_parses_big_endian_lengths() {
|
||||
let header = MessageHeader::from_bytes([0, 1, 0, 0, 0, 0, 0, 16]).unwrap();
|
||||
|
||||
assert_eq!(header.encoding(), Encoding::BigEndian);
|
||||
assert_eq!(header.message_type(), MessageType::Synchronous);
|
||||
assert_eq!(header.size(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_rejects_lengths_smaller_than_header() {
|
||||
assert!(matches!(
|
||||
MessageHeader::from_bytes([1, 2, 0, 0, 7, 0, 0, 0]),
|
||||
Err(CoreError::InvalidMessageLength(7))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn header_to_bytes_round_trips() {
|
||||
let header = MessageHeader::new(
|
||||
Encoding::LittleEndian,
|
||||
MessageType::Response,
|
||||
Compression::Compressed,
|
||||
64,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let bytes = header.to_bytes().unwrap();
|
||||
assert_eq!(MessageHeader::from_bytes(bytes).unwrap(), header);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn frame_parse_validates_declared_length() {
|
||||
let frame = [1, 2, 0, 0, 10, 0, 0, 0, 42, 43];
|
||||
let parsed = Frame::parse(&frame).unwrap();
|
||||
|
||||
assert_eq!(parsed.header().size(), 10);
|
||||
assert_eq!(parsed.body(), &[42, 43]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn frame_parse_rejects_length_mismatch() {
|
||||
let frame = [1, 2, 0, 0, 11, 0, 0, 0, 42, 43];
|
||||
assert!(matches!(
|
||||
Frame::parse(&frame),
|
||||
Err(CoreError::FrameLengthMismatch {
|
||||
declared: 11,
|
||||
actual: 10
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_body_wraps_uncompressed_little_endian_body() {
|
||||
let payload = serialize_body_as_message(
|
||||
&[10, 20, 30],
|
||||
Encoding::LittleEndian,
|
||||
MessageType::Synchronous,
|
||||
Compression::Uncompressed,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(payload, vec![1, 1, 0, 0, 11, 0, 0, 0, 10, 20, 30]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_body_rejects_big_endian_for_now() {
|
||||
assert!(matches!(
|
||||
serialize_body_as_message(
|
||||
&[1],
|
||||
Encoding::BigEndian,
|
||||
MessageType::Asynchronous,
|
||||
Compression::Uncompressed,
|
||||
),
|
||||
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_body_rejects_compressed_frames_for_now() {
|
||||
assert!(matches!(
|
||||
serialize_body_as_message(
|
||||
&[1],
|
||||
Encoding::LittleEndian,
|
||||
MessageType::Asynchronous,
|
||||
Compression::CompressedLarge,
|
||||
),
|
||||
Err(CoreError::UnsupportedCompression(
|
||||
Compression::CompressedLarge
|
||||
))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_frame_reads_complete_payload() {
|
||||
let mut cursor = Cursor::new(vec![1, 2, 0, 0, 10, 0, 0, 0, 42, 43]);
|
||||
let frame = read_frame(&mut cursor).unwrap();
|
||||
|
||||
assert_eq!(frame, vec![1, 2, 0, 0, 10, 0, 0, 0, 42, 43]);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// StreamingDecompressor tests
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Helper: compress a body using the batch decompressor, then verify the
|
||||
/// streaming decompressor produces identical output.
|
||||
///
|
||||
/// Since we don't have an encoder for compression, we test by creating
|
||||
/// compressed data that the batch decompressor can handle and verifying
|
||||
/// the streaming variant matches. We use decompress_ipc_body as the
|
||||
/// reference implementation.
|
||||
fn assert_streaming_matches_batch(compressed_body: &[u8]) {
|
||||
let batch_result = decompress_ipc_body(compressed_body, Encoding::LittleEndian).unwrap();
|
||||
|
||||
// Feed all at once
|
||||
let size_prefix: [u8; 4] = compressed_body[..4].try_into().unwrap();
|
||||
let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||
dec.feed(&compressed_body[4..]).unwrap();
|
||||
assert!(dec.is_complete());
|
||||
let streaming_result = dec.finish().unwrap();
|
||||
assert_eq!(streaming_result, batch_result, "all-at-once mismatch");
|
||||
|
||||
// Feed byte-by-byte
|
||||
let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||
for &byte in &compressed_body[4..] {
|
||||
dec.feed(&[byte]).unwrap();
|
||||
}
|
||||
assert!(dec.is_complete());
|
||||
let streaming_result = dec.finish().unwrap();
|
||||
assert_eq!(streaming_result, batch_result, "byte-by-byte mismatch");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_decompressor_empty_body() {
|
||||
// Size prefix says 8 bytes total (header only), so decompressed size = 0
|
||||
let size_prefix = 8_i32.to_le_bytes();
|
||||
let dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||
// No data to feed — already complete
|
||||
assert!(dec.is_complete());
|
||||
assert_eq!(dec.decompressed_len(), 0);
|
||||
let result = dec.finish().unwrap();
|
||||
assert!(result.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_decompressor_rejects_small_size() {
|
||||
let size_prefix = 4_i32.to_le_bytes();
|
||||
assert!(StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_decompressor_finish_before_complete() {
|
||||
// Size says 16 bytes decompressed (24 total - 8 header)
|
||||
let size_prefix = 24_i32.to_le_bytes();
|
||||
let dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||
assert!(!dec.is_complete());
|
||||
assert!(dec.finish().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streaming_decompressor_literal_only() {
|
||||
// Build a compressed payload that's all literals (no back-references).
|
||||
// Flag byte 0x00 means all 8 bits are "literal".
|
||||
// For a 3-byte decompressed output:
|
||||
// size_prefix = (8 + 3) = 11
|
||||
// compressed: [flag=0x00] [lit1] [lit2] [lit3]
|
||||
let size_prefix = 11_i32.to_le_bytes();
|
||||
let mut compressed = Vec::new();
|
||||
compressed.extend_from_slice(&size_prefix);
|
||||
compressed.push(0x00); // flag: 8 literals
|
||||
compressed.push(0x41); // 'A'
|
||||
compressed.push(0x42); // 'B'
|
||||
compressed.push(0x43); // 'C'
|
||||
|
||||
assert_streaming_matches_batch(&compressed);
|
||||
}
|
||||
}
|
||||
61
crates/qroissant-core/src/lib.rs
Normal file
61
crates/qroissant-core/src/lib.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
//! q IPC protocol and value semantics for qroissant.
|
||||
//!
|
||||
//! This crate provides the core building blocks for encoding, decoding, and
|
||||
//! representing q/kdb+ IPC messages:
|
||||
//!
|
||||
//! - **`protocol`** — type codes, primitives, shapes, and attributes that
|
||||
//! define the q wire format.
|
||||
//! - **`value`** — the `Value` enum and its variants (`Atom`, `Vector`,
|
||||
//! `List`, `Dictionary`, `Table`) that model q data in Rust.
|
||||
//! - **`frame`** — message framing, header parsing, compression, and the
|
||||
//! `StreamingDecompressor` for incremental LZW decompression.
|
||||
//! - **`decode`** — synchronous message and value decoding with optional
|
||||
//! parallel column decode via rayon.
|
||||
//! - **`encode`** — serialisation of `Value` trees into q IPC byte frames.
|
||||
//! - **`pipelined`** — asynchronous (`tokio::io::AsyncRead`) value decoder
|
||||
//! for streaming use cases.
|
||||
//! - **`extent`** — zero-allocation byte extent scanning used to locate
|
||||
//! column boundaries for parallel decode.
|
||||
|
||||
pub mod decode;
|
||||
pub mod encode;
|
||||
pub mod error;
|
||||
pub mod extent;
|
||||
pub mod frame;
|
||||
pub mod pipelined;
|
||||
pub mod protocol;
|
||||
pub mod value;
|
||||
|
||||
pub use decode::DecodeOptions;
|
||||
pub use decode::DecodedMessage;
|
||||
pub use decode::decode_message;
|
||||
pub use decode::decode_message_with_options;
|
||||
pub use decode::decode_value;
|
||||
pub use decode::decode_value_with_options;
|
||||
pub use encode::encode_message;
|
||||
pub use encode::encode_value;
|
||||
pub use error::CoreError;
|
||||
pub use error::CoreResult;
|
||||
pub use extent::value_byte_extent;
|
||||
pub use frame::Compression;
|
||||
pub use frame::Encoding;
|
||||
pub use frame::Frame;
|
||||
pub use frame::HEADER_LEN;
|
||||
pub use frame::MessageHeader;
|
||||
pub use frame::MessageType;
|
||||
pub use frame::StreamingDecompressor;
|
||||
pub use frame::read_frame;
|
||||
pub use frame::read_message_length;
|
||||
pub use frame::serialize_body_as_message;
|
||||
pub use protocol::Attribute;
|
||||
pub use protocol::Primitive;
|
||||
pub use protocol::Shape;
|
||||
pub use protocol::TypeCode;
|
||||
pub use protocol::ValueType;
|
||||
pub use value::Atom;
|
||||
pub use value::Dictionary;
|
||||
pub use value::List;
|
||||
pub use value::Table;
|
||||
pub use value::Value;
|
||||
pub use value::Vector;
|
||||
pub use value::VectorData;
|
||||
390
crates/qroissant-core/src/pipelined.rs
Normal file
390
crates/qroissant-core/src/pipelined.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
|||
use futures::future::BoxFuture;
|
||||
use futures::future::FutureExt;
|
||||
use tokio::io::AsyncRead;
|
||||
use tokio::io::AsyncReadExt;
|
||||
|
||||
use crate::decode::extract_columns;
|
||||
use crate::decode::extract_symbol_names;
|
||||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
use crate::frame::Encoding;
|
||||
use crate::protocol::Attribute;
|
||||
use crate::protocol::Primitive;
|
||||
use crate::protocol::TypeCode;
|
||||
use crate::value::Atom;
|
||||
use crate::value::Dictionary;
|
||||
use crate::value::List;
|
||||
use crate::value::Table;
|
||||
use crate::value::Value;
|
||||
use crate::value::Vector;
|
||||
use crate::value::VectorData;
|
||||
|
||||
/// Asynchronous reader for q value components.
|
||||
///
|
||||
/// Wraps an `AsyncRead` source and provides async methods to read
|
||||
/// primitive types and byte chunks, allowing the decoder to wait
|
||||
/// for data without blocking.
|
||||
///
|
||||
/// Only little-endian payloads are supported (matching the rest of qroissant).
|
||||
pub struct PipelinedReader<R> {
|
||||
reader: R,
|
||||
}
|
||||
|
||||
impl<R: AsyncRead + Unpin> PipelinedReader<R> {
|
||||
/// Creates a new pipelined reader.
|
||||
///
|
||||
/// Returns `UnsupportedEndianness` for big-endian payloads, matching
|
||||
/// the behaviour of `decode_value()` and `decode_message()`.
|
||||
pub fn new(reader: R, encoding: Encoding) -> CoreResult<Self> {
|
||||
if encoding != Encoding::LittleEndian {
|
||||
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||
}
|
||||
Ok(Self { reader })
|
||||
}
|
||||
|
||||
pub async fn read_u8(&mut self) -> CoreResult<u8> {
|
||||
let mut buf = [0_u8; 1];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(buf[0])
|
||||
}
|
||||
|
||||
pub async fn read_i8(&mut self) -> CoreResult<i8> {
|
||||
Ok(self.read_u8().await? as i8)
|
||||
}
|
||||
|
||||
pub async fn read_i16(&mut self) -> CoreResult<i16> {
|
||||
let mut buf = [0_u8; 2];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(i16::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub async fn read_i32(&mut self) -> CoreResult<i32> {
|
||||
let mut buf = [0_u8; 4];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(i32::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub async fn read_i64(&mut self) -> CoreResult<i64> {
|
||||
let mut buf = [0_u8; 8];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(i64::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub async fn read_f32(&mut self) -> CoreResult<f32> {
|
||||
let mut buf = [0_u8; 4];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(f32::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub async fn read_f64(&mut self) -> CoreResult<f64> {
|
||||
let mut buf = [0_u8; 8];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(f64::from_le_bytes(buf))
|
||||
}
|
||||
|
||||
pub async fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
|
||||
let mut buf = [0_u8; 16];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
pub async fn read_length(&mut self) -> CoreResult<usize> {
|
||||
let length = self.read_i32().await?;
|
||||
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
|
||||
}
|
||||
|
||||
pub async fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
|
||||
let mut buf = vec![0_u8; len];
|
||||
self.reader.read_exact(&mut buf).await?;
|
||||
Ok(bytes::Bytes::from(buf))
|
||||
}
|
||||
|
||||
/// Reads a null-terminated symbol.
|
||||
///
|
||||
/// Reads one byte at a time until a null terminator is found.
|
||||
/// In practice the underlying reader is buffered (e.g. `BufReader`
|
||||
/// or `DecompressingReader` with an 8 KB buffer), so single-byte
|
||||
/// `read_exact` calls are cheap — they copy from the user-space buffer
|
||||
/// without issuing a syscall.
|
||||
pub async fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
|
||||
let mut buf = Vec::new();
|
||||
loop {
|
||||
let b = self.read_u8().await?;
|
||||
if b == 0 {
|
||||
return Ok(bytes::Bytes::from(buf));
|
||||
}
|
||||
buf.push(b);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
|
||||
&mut self,
|
||||
count: usize,
|
||||
) -> CoreResult<Vec<T>> {
|
||||
let _byte_len = count
|
||||
.checked_mul(std::mem::size_of::<T>())
|
||||
.ok_or(CoreError::LengthOverflow(count))?;
|
||||
let mut values = vec![T::zeroed(); count];
|
||||
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
|
||||
self.reader.read_exact(dst).await?;
|
||||
Ok(values)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn decode_value_async<R: AsyncRead + Unpin + Send>(
|
||||
reader: &mut PipelinedReader<R>,
|
||||
) -> CoreResult<Value> {
|
||||
decode_inner_async(reader).await
|
||||
}
|
||||
|
||||
fn decode_inner_async<'a, R: AsyncRead + Unpin + Send>(
|
||||
reader: &'a mut PipelinedReader<R>,
|
||||
) -> BoxFuture<'a, CoreResult<Value>> {
|
||||
async move {
|
||||
let type_code_byte = reader.read_i8().await?;
|
||||
let type_code = TypeCode::try_from(type_code_byte)?;
|
||||
match type_code.shape() {
|
||||
crate::protocol::Shape::Atom => {
|
||||
let primitive = type_code
|
||||
.primitive()
|
||||
.ok_or(CoreError::InvalidTypeCode(type_code.into()))?;
|
||||
Ok(Value::Atom(decode_atom_async(reader, primitive).await?))
|
||||
}
|
||||
crate::protocol::Shape::Vector => {
|
||||
let primitive = type_code
|
||||
.primitive()
|
||||
.ok_or(CoreError::InvalidTypeCode(type_code.into()))?;
|
||||
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||
let length = reader.read_length().await?;
|
||||
Ok(Value::Vector(
|
||||
decode_vector_async(reader, primitive, attribute, length).await?,
|
||||
))
|
||||
}
|
||||
crate::protocol::Shape::List => {
|
||||
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||
let length = reader.read_length().await?;
|
||||
let mut values = Vec::with_capacity(length);
|
||||
for _ in 0..length {
|
||||
values.push(decode_inner_async(reader).await?);
|
||||
}
|
||||
Ok(Value::List(List::new(attribute, values)))
|
||||
}
|
||||
crate::protocol::Shape::Dictionary => {
|
||||
let sorted = type_code == TypeCode::SortedDictionary;
|
||||
let keys = decode_inner_async(reader).await?;
|
||||
let values = decode_inner_async(reader).await?;
|
||||
let dict = Dictionary::new(sorted, keys, values);
|
||||
dict.validate()?;
|
||||
Ok(Value::Dictionary(dict))
|
||||
}
|
||||
crate::protocol::Shape::Table => {
|
||||
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||
let dict_value = decode_inner_async(reader).await?;
|
||||
match dict_value {
|
||||
Value::Dictionary(dict) => {
|
||||
let names = extract_symbol_names(dict.keys())?;
|
||||
let columns = extract_columns(dict.values())?;
|
||||
let table = Table::new(attribute, names, columns);
|
||||
table.validate()?;
|
||||
Ok(Value::Table(table))
|
||||
}
|
||||
_ => Err(CoreError::InvalidStructure(
|
||||
"q table payload must contain a dictionary body".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
crate::protocol::Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
|
||||
opcode: reader.read_i8().await?,
|
||||
}),
|
||||
crate::protocol::Shape::Error => {
|
||||
let error_msg = reader.read_symbol().await?;
|
||||
Err(CoreError::QRuntime(
|
||||
String::from_utf8_lossy(&error_msg).into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
.boxed()
|
||||
}
|
||||
|
||||
async fn decode_atom_async<R: AsyncRead + Unpin + Send>(
|
||||
reader: &mut PipelinedReader<R>,
|
||||
primitive: Primitive,
|
||||
) -> CoreResult<Atom> {
|
||||
Ok(match primitive {
|
||||
Primitive::Boolean => Atom::Boolean(reader.read_u8().await? != 0),
|
||||
Primitive::Guid => Atom::Guid(reader.read_guid().await?),
|
||||
Primitive::Byte => Atom::Byte(reader.read_u8().await?),
|
||||
Primitive::Short => Atom::Short(reader.read_i16().await?),
|
||||
Primitive::Int => Atom::Int(reader.read_i32().await?),
|
||||
Primitive::Long => Atom::Long(reader.read_i64().await?),
|
||||
Primitive::Real => Atom::Real(reader.read_f32().await?),
|
||||
Primitive::Float => Atom::Float(reader.read_f64().await?),
|
||||
Primitive::Char => Atom::Char(reader.read_u8().await?),
|
||||
Primitive::Symbol => Atom::Symbol(reader.read_symbol().await?),
|
||||
Primitive::Timestamp => Atom::Timestamp(reader.read_i64().await?),
|
||||
Primitive::Month => Atom::Month(reader.read_i32().await?),
|
||||
Primitive::Date => Atom::Date(reader.read_i32().await?),
|
||||
Primitive::Datetime => Atom::Datetime(reader.read_f64().await?),
|
||||
Primitive::Timespan => Atom::Timespan(reader.read_i64().await?),
|
||||
Primitive::Minute => Atom::Minute(reader.read_i32().await?),
|
||||
Primitive::Second => Atom::Second(reader.read_i32().await?),
|
||||
Primitive::Time => Atom::Time(reader.read_i32().await?),
|
||||
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
|
||||
})
|
||||
}
|
||||
|
||||
async fn decode_vector_async<R: AsyncRead + Unpin + Send>(
|
||||
reader: &mut PipelinedReader<R>,
|
||||
primitive: Primitive,
|
||||
attribute: Attribute,
|
||||
length: usize,
|
||||
) -> CoreResult<Vector> {
|
||||
let data = match primitive {
|
||||
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length).await?),
|
||||
Primitive::Guid => {
|
||||
let byte_len = length
|
||||
.checked_mul(16)
|
||||
.ok_or(CoreError::LengthOverflow(length))?;
|
||||
VectorData::Guid(reader.read_bytes(byte_len).await?)
|
||||
}
|
||||
Primitive::Byte => VectorData::Byte(reader.read_bytes(length).await?),
|
||||
Primitive::Short => VectorData::Short(reader.read_bytes(length * 2).await?),
|
||||
Primitive::Int => VectorData::Int(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Long => VectorData::Long(reader.read_bytes(length * 8).await?),
|
||||
Primitive::Real => VectorData::Real(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Float => VectorData::Float(reader.read_bytes(length * 8).await?),
|
||||
Primitive::Char => VectorData::Char(reader.read_bytes(length).await?),
|
||||
Primitive::Symbol => {
|
||||
let mut values = Vec::with_capacity(length);
|
||||
for _ in 0..length {
|
||||
values.push(reader.read_symbol().await?);
|
||||
}
|
||||
VectorData::Symbol(values)
|
||||
}
|
||||
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes(length * 8).await?),
|
||||
Primitive::Month => VectorData::Month(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Date => VectorData::Date(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Datetime => VectorData::Datetime(reader.read_bytes(length * 8).await?),
|
||||
Primitive::Timespan => VectorData::Timespan(reader.read_bytes(length * 8).await?),
|
||||
Primitive::Minute => VectorData::Minute(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Second => VectorData::Second(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Time => VectorData::Time(reader.read_bytes(length * 4).await?),
|
||||
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
|
||||
};
|
||||
|
||||
Ok(Vector::new(attribute, data))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_decode_atom_async() -> CoreResult<()> {
|
||||
let mut data = Vec::new();
|
||||
data.push(TypeCode::IntAtom as u8);
|
||||
data.extend_from_slice(&42_i32.to_le_bytes());
|
||||
|
||||
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||
let value = decode_value_async(&mut reader).await?;
|
||||
|
||||
assert_eq!(value, Value::Atom(Atom::Int(42)));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_decode_vector_async() -> CoreResult<()> {
|
||||
let mut data = Vec::new();
|
||||
data.push(TypeCode::IntVector as u8);
|
||||
data.push(0_u8); // attribute None
|
||||
data.extend_from_slice(&2_i32.to_le_bytes()); // length 2
|
||||
data.extend_from_slice(&10_i32.to_le_bytes());
|
||||
data.extend_from_slice(&20_i32.to_le_bytes());
|
||||
|
||||
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||
let value = decode_value_async(&mut reader).await?;
|
||||
|
||||
match &value {
|
||||
Value::Vector(vector) => {
|
||||
assert_eq!(vector.data().as_i32_slice(), &[10, 20]);
|
||||
}
|
||||
_ => panic!("Expected Vector, got {:?}", value),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_decode_table_async() -> CoreResult<()> {
|
||||
let mut data = Vec::new();
|
||||
data.push(TypeCode::Table as u8);
|
||||
data.push(0_u8); // attribute None
|
||||
|
||||
// Dictionary prefix
|
||||
data.push(TypeCode::Dictionary as u8);
|
||||
|
||||
// Dictionary (keys)
|
||||
data.push(TypeCode::SymbolVector as u8);
|
||||
data.push(0_u8); // attribute None
|
||||
data.extend_from_slice(&1_i32.to_le_bytes()); // 1 column name
|
||||
data.extend_from_slice(b"col1\0");
|
||||
|
||||
// Dictionary (values)
|
||||
data.push(TypeCode::GeneralList as u8);
|
||||
data.push(0_u8); // attribute None
|
||||
data.extend_from_slice(&1_i32.to_le_bytes()); // 1 column
|
||||
|
||||
// Column 1: Int Vector [100, 200]
|
||||
data.push(TypeCode::IntVector as u8);
|
||||
data.push(0_u8);
|
||||
data.extend_from_slice(&2_i32.to_le_bytes());
|
||||
data.extend_from_slice(&100_i32.to_le_bytes());
|
||||
data.extend_from_slice(&200_i32.to_le_bytes());
|
||||
|
||||
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||
let value = decode_value_async(&mut reader).await?;
|
||||
|
||||
match &value {
|
||||
Value::Table(table) => {
|
||||
assert_eq!(table.num_columns(), 1);
|
||||
assert_eq!(&table.column_names()[0][..], b"col1");
|
||||
match &table.columns()[0] {
|
||||
Value::Vector(v) => {
|
||||
assert_eq!(v.data().as_i32_slice(), &[100, 200]);
|
||||
}
|
||||
_ => panic!("Expected Vector"),
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected Table, got {:?}", value),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rejects_big_endian() {
|
||||
let result = PipelinedReader::new(Cursor::new(vec![]), Encoding::BigEndian);
|
||||
assert!(matches!(
|
||||
result,
|
||||
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_negative_length_gives_proper_error() -> CoreResult<()> {
|
||||
let mut data = Vec::new();
|
||||
data.push(TypeCode::IntVector as u8);
|
||||
data.push(0_u8); // attribute None
|
||||
data.extend_from_slice(&(-1_i32).to_le_bytes()); // negative length
|
||||
|
||||
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||
let err = decode_value_async(&mut reader).await.unwrap_err();
|
||||
assert!(
|
||||
matches!(err, CoreError::InvalidCollectionLength(-1)),
|
||||
"expected InvalidCollectionLength(-1), got {:?}",
|
||||
err
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
373
crates/qroissant-core/src/protocol.rs
Normal file
373
crates/qroissant-core/src/protocol.rs
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
|
||||
/// q attribute attached to vectors, lists, and tables.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub enum Attribute {
|
||||
#[default]
|
||||
None,
|
||||
Sorted,
|
||||
Unique,
|
||||
Parted,
|
||||
Grouped,
|
||||
}
|
||||
|
||||
impl From<Attribute> for i8 {
|
||||
fn from(value: Attribute) -> Self {
|
||||
match value {
|
||||
Attribute::None => 0,
|
||||
Attribute::Sorted => 1,
|
||||
Attribute::Unique => 2,
|
||||
Attribute::Parted => 3,
|
||||
Attribute::Grouped => 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<i8> for Attribute {
|
||||
type Error = CoreError;
|
||||
|
||||
fn try_from(value: i8) -> CoreResult<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::None),
|
||||
1 => Ok(Self::Sorted),
|
||||
2 => Ok(Self::Unique),
|
||||
3 => Ok(Self::Parted),
|
||||
4 => Ok(Self::Grouped),
|
||||
_ => Err(CoreError::InvalidAttribute(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// q primitive domain shared by atoms and homogeneous vectors.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Primitive {
|
||||
Boolean,
|
||||
Guid,
|
||||
Byte,
|
||||
Short,
|
||||
Int,
|
||||
Long,
|
||||
Real,
|
||||
Float,
|
||||
Char,
|
||||
Symbol,
|
||||
Timestamp,
|
||||
Month,
|
||||
Date,
|
||||
Datetime,
|
||||
Timespan,
|
||||
Minute,
|
||||
Second,
|
||||
Time,
|
||||
Mixed,
|
||||
}
|
||||
|
||||
impl Primitive {
|
||||
/// Fixed-width byte width for primitives that have one on the wire.
|
||||
pub fn width(self) -> Option<usize> {
|
||||
match self {
|
||||
Self::Boolean | Self::Byte | Self::Char => Some(1),
|
||||
Self::Short => Some(2),
|
||||
Self::Int
|
||||
| Self::Real
|
||||
| Self::Month
|
||||
| Self::Date
|
||||
| Self::Minute
|
||||
| Self::Second
|
||||
| Self::Time => Some(4),
|
||||
Self::Long | Self::Float | Self::Timestamp | Self::Datetime | Self::Timespan => Some(8),
|
||||
Self::Guid => Some(16),
|
||||
Self::Symbol | Self::Mixed => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level q structural shape.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Shape {
|
||||
Atom,
|
||||
Vector,
|
||||
List,
|
||||
Dictionary,
|
||||
Table,
|
||||
UnaryPrimitive,
|
||||
Error,
|
||||
}
|
||||
|
||||
/// Complete q type descriptor for a decoded value.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct ValueType {
|
||||
pub primitive: Option<Primitive>,
|
||||
pub shape: Shape,
|
||||
pub attribute: Option<Attribute>,
|
||||
pub sorted: Option<bool>,
|
||||
}
|
||||
|
||||
impl ValueType {
|
||||
pub fn atom(primitive: Primitive) -> Self {
|
||||
Self {
|
||||
primitive: Some(primitive),
|
||||
shape: Shape::Atom,
|
||||
attribute: None,
|
||||
sorted: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn vector(primitive: Primitive, attribute: Attribute) -> Self {
|
||||
Self {
|
||||
primitive: Some(primitive),
|
||||
shape: Shape::Vector,
|
||||
attribute: Some(attribute),
|
||||
sorted: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list(attribute: Attribute) -> Self {
|
||||
Self {
|
||||
primitive: Some(Primitive::Mixed),
|
||||
shape: Shape::List,
|
||||
attribute: Some(attribute),
|
||||
sorted: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dictionary(sorted: bool) -> Self {
|
||||
Self {
|
||||
primitive: None,
|
||||
shape: Shape::Dictionary,
|
||||
attribute: None,
|
||||
sorted: Some(sorted),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn table(attribute: Attribute) -> Self {
|
||||
Self {
|
||||
primitive: None,
|
||||
shape: Shape::Table,
|
||||
attribute: Some(attribute),
|
||||
sorted: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unary_primitive() -> Self {
|
||||
Self {
|
||||
primitive: None,
|
||||
shape: Shape::UnaryPrimitive,
|
||||
attribute: None,
|
||||
sorted: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw q IPC type code.
|
||||
#[repr(i8)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum TypeCode {
|
||||
GeneralList = 0,
|
||||
BooleanVector = 1,
|
||||
GuidVector = 2,
|
||||
ByteVector = 4,
|
||||
ShortVector = 5,
|
||||
IntVector = 6,
|
||||
LongVector = 7,
|
||||
RealVector = 8,
|
||||
FloatVector = 9,
|
||||
CharVector = 10,
|
||||
SymbolVector = 11,
|
||||
TimestampVector = 12,
|
||||
MonthVector = 13,
|
||||
DateVector = 14,
|
||||
DatetimeVector = 15,
|
||||
TimespanVector = 16,
|
||||
MinuteVector = 17,
|
||||
SecondVector = 18,
|
||||
TimeVector = 19,
|
||||
Table = 98,
|
||||
Dictionary = 99,
|
||||
UnaryPrimitive = 101,
|
||||
SortedDictionary = 127,
|
||||
BooleanAtom = -1,
|
||||
GuidAtom = -2,
|
||||
ByteAtom = -4,
|
||||
ShortAtom = -5,
|
||||
IntAtom = -6,
|
||||
LongAtom = -7,
|
||||
RealAtom = -8,
|
||||
FloatAtom = -9,
|
||||
CharAtom = -10,
|
||||
SymbolAtom = -11,
|
||||
TimestampAtom = -12,
|
||||
MonthAtom = -13,
|
||||
DateAtom = -14,
|
||||
DatetimeAtom = -15,
|
||||
TimespanAtom = -16,
|
||||
MinuteAtom = -17,
|
||||
SecondAtom = -18,
|
||||
TimeAtom = -19,
|
||||
ErrorCode = -128,
|
||||
}
|
||||
|
||||
impl TypeCode {
|
||||
pub fn primitive(self) -> Option<Primitive> {
|
||||
match self {
|
||||
Self::BooleanAtom | Self::BooleanVector => Some(Primitive::Boolean),
|
||||
Self::GuidAtom | Self::GuidVector => Some(Primitive::Guid),
|
||||
Self::ByteAtom | Self::ByteVector => Some(Primitive::Byte),
|
||||
Self::ShortAtom | Self::ShortVector => Some(Primitive::Short),
|
||||
Self::IntAtom | Self::IntVector => Some(Primitive::Int),
|
||||
Self::LongAtom | Self::LongVector => Some(Primitive::Long),
|
||||
Self::RealAtom | Self::RealVector => Some(Primitive::Real),
|
||||
Self::FloatAtom | Self::FloatVector => Some(Primitive::Float),
|
||||
Self::CharAtom | Self::CharVector => Some(Primitive::Char),
|
||||
Self::SymbolAtom | Self::SymbolVector => Some(Primitive::Symbol),
|
||||
Self::TimestampAtom | Self::TimestampVector => Some(Primitive::Timestamp),
|
||||
Self::MonthAtom | Self::MonthVector => Some(Primitive::Month),
|
||||
Self::DateAtom | Self::DateVector => Some(Primitive::Date),
|
||||
Self::DatetimeAtom | Self::DatetimeVector => Some(Primitive::Datetime),
|
||||
Self::TimespanAtom | Self::TimespanVector => Some(Primitive::Timespan),
|
||||
Self::MinuteAtom | Self::MinuteVector => Some(Primitive::Minute),
|
||||
Self::SecondAtom | Self::SecondVector => Some(Primitive::Second),
|
||||
Self::TimeAtom | Self::TimeVector => Some(Primitive::Time),
|
||||
Self::GeneralList
|
||||
| Self::Table
|
||||
| Self::Dictionary
|
||||
| Self::UnaryPrimitive
|
||||
| Self::SortedDictionary
|
||||
| Self::ErrorCode => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shape(self) -> Shape {
|
||||
match self {
|
||||
Self::BooleanAtom
|
||||
| Self::GuidAtom
|
||||
| Self::ByteAtom
|
||||
| Self::ShortAtom
|
||||
| Self::IntAtom
|
||||
| Self::LongAtom
|
||||
| Self::RealAtom
|
||||
| Self::FloatAtom
|
||||
| Self::CharAtom
|
||||
| Self::SymbolAtom
|
||||
| Self::TimestampAtom
|
||||
| Self::MonthAtom
|
||||
| Self::DateAtom
|
||||
| Self::DatetimeAtom
|
||||
| Self::TimespanAtom
|
||||
| Self::MinuteAtom
|
||||
| Self::SecondAtom
|
||||
| Self::TimeAtom => Shape::Atom,
|
||||
Self::BooleanVector
|
||||
| Self::GuidVector
|
||||
| Self::ByteVector
|
||||
| Self::ShortVector
|
||||
| Self::IntVector
|
||||
| Self::LongVector
|
||||
| Self::RealVector
|
||||
| Self::FloatVector
|
||||
| Self::CharVector
|
||||
| Self::SymbolVector
|
||||
| Self::TimestampVector
|
||||
| Self::MonthVector
|
||||
| Self::DateVector
|
||||
| Self::DatetimeVector
|
||||
| Self::TimespanVector
|
||||
| Self::MinuteVector
|
||||
| Self::SecondVector
|
||||
| Self::TimeVector => Shape::Vector,
|
||||
Self::GeneralList => Shape::List,
|
||||
Self::Dictionary | Self::SortedDictionary => Shape::Dictionary,
|
||||
Self::Table => Shape::Table,
|
||||
Self::UnaryPrimitive => Shape::UnaryPrimitive,
|
||||
Self::ErrorCode => Shape::Error,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TypeCode> for i8 {
|
||||
fn from(value: TypeCode) -> Self {
|
||||
value as i8
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<i8> for TypeCode {
|
||||
type Error = CoreError;
|
||||
|
||||
fn try_from(value: i8) -> CoreResult<Self> {
|
||||
match value {
|
||||
0 => Ok(Self::GeneralList),
|
||||
1 => Ok(Self::BooleanVector),
|
||||
2 => Ok(Self::GuidVector),
|
||||
4 => Ok(Self::ByteVector),
|
||||
5 => Ok(Self::ShortVector),
|
||||
6 => Ok(Self::IntVector),
|
||||
7 => Ok(Self::LongVector),
|
||||
8 => Ok(Self::RealVector),
|
||||
9 => Ok(Self::FloatVector),
|
||||
10 => Ok(Self::CharVector),
|
||||
11 => Ok(Self::SymbolVector),
|
||||
12 => Ok(Self::TimestampVector),
|
||||
13 => Ok(Self::MonthVector),
|
||||
14 => Ok(Self::DateVector),
|
||||
15 => Ok(Self::DatetimeVector),
|
||||
16 => Ok(Self::TimespanVector),
|
||||
17 => Ok(Self::MinuteVector),
|
||||
18 => Ok(Self::SecondVector),
|
||||
19 => Ok(Self::TimeVector),
|
||||
98 => Ok(Self::Table),
|
||||
99 => Ok(Self::Dictionary),
|
||||
101 => Ok(Self::UnaryPrimitive),
|
||||
127 => Ok(Self::SortedDictionary),
|
||||
-1 => Ok(Self::BooleanAtom),
|
||||
-2 => Ok(Self::GuidAtom),
|
||||
-4 => Ok(Self::ByteAtom),
|
||||
-5 => Ok(Self::ShortAtom),
|
||||
-6 => Ok(Self::IntAtom),
|
||||
-7 => Ok(Self::LongAtom),
|
||||
-8 => Ok(Self::RealAtom),
|
||||
-9 => Ok(Self::FloatAtom),
|
||||
-10 => Ok(Self::CharAtom),
|
||||
-11 => Ok(Self::SymbolAtom),
|
||||
-12 => Ok(Self::TimestampAtom),
|
||||
-13 => Ok(Self::MonthAtom),
|
||||
-14 => Ok(Self::DateAtom),
|
||||
-15 => Ok(Self::DatetimeAtom),
|
||||
-16 => Ok(Self::TimespanAtom),
|
||||
-17 => Ok(Self::MinuteAtom),
|
||||
-18 => Ok(Self::SecondAtom),
|
||||
-19 => Ok(Self::TimeAtom),
|
||||
-128 => Ok(Self::ErrorCode),
|
||||
_ => Err(CoreError::InvalidTypeCode(value)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn attribute_round_trips() {
|
||||
assert_eq!(Attribute::try_from(0).unwrap(), Attribute::None);
|
||||
assert_eq!(Attribute::try_from(4).unwrap(), Attribute::Grouped);
|
||||
assert!(matches!(
|
||||
Attribute::try_from(9),
|
||||
Err(CoreError::InvalidAttribute(9))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_code_maps_to_expected_shape_and_primitive() {
|
||||
let atom = TypeCode::IntAtom;
|
||||
let vector = TypeCode::SymbolVector;
|
||||
let list = TypeCode::GeneralList;
|
||||
|
||||
assert_eq!(atom.shape(), Shape::Atom);
|
||||
assert_eq!(atom.primitive(), Some(Primitive::Int));
|
||||
assert_eq!(vector.shape(), Shape::Vector);
|
||||
assert_eq!(vector.primitive(), Some(Primitive::Symbol));
|
||||
assert_eq!(list.shape(), Shape::List);
|
||||
assert_eq!(list.primitive(), None);
|
||||
}
|
||||
}
|
||||
479
crates/qroissant-core/src/value.rs
Normal file
479
crates/qroissant-core/src/value.rs
Normal file
|
|
@ -0,0 +1,479 @@
|
|||
use bytes::Bytes;
|
||||
|
||||
use crate::error::CoreError;
|
||||
use crate::error::CoreResult;
|
||||
use crate::protocol::Attribute;
|
||||
use crate::protocol::Primitive;
|
||||
use crate::protocol::ValueType;
|
||||
|
||||
/// q atom payload.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Atom {
|
||||
Boolean(bool),
|
||||
Guid([u8; 16]),
|
||||
Byte(u8),
|
||||
Short(i16),
|
||||
Int(i32),
|
||||
Long(i64),
|
||||
Real(f32),
|
||||
Float(f64),
|
||||
Char(u8),
|
||||
Symbol(Bytes),
|
||||
Timestamp(i64),
|
||||
Month(i32),
|
||||
Date(i32),
|
||||
Datetime(f64),
|
||||
Timespan(i64),
|
||||
Minute(i32),
|
||||
Second(i32),
|
||||
Time(i32),
|
||||
}
|
||||
|
||||
impl Atom {
|
||||
pub fn primitive(&self) -> Primitive {
|
||||
match self {
|
||||
Self::Boolean(_) => Primitive::Boolean,
|
||||
Self::Guid(_) => Primitive::Guid,
|
||||
Self::Byte(_) => Primitive::Byte,
|
||||
Self::Short(_) => Primitive::Short,
|
||||
Self::Int(_) => Primitive::Int,
|
||||
Self::Long(_) => Primitive::Long,
|
||||
Self::Real(_) => Primitive::Real,
|
||||
Self::Float(_) => Primitive::Float,
|
||||
Self::Char(_) => Primitive::Char,
|
||||
Self::Symbol(_) => Primitive::Symbol,
|
||||
Self::Timestamp(_) => Primitive::Timestamp,
|
||||
Self::Month(_) => Primitive::Month,
|
||||
Self::Date(_) => Primitive::Date,
|
||||
Self::Datetime(_) => Primitive::Datetime,
|
||||
Self::Timespan(_) => Primitive::Timespan,
|
||||
Self::Minute(_) => Primitive::Minute,
|
||||
Self::Second(_) => Primitive::Second,
|
||||
Self::Time(_) => Primitive::Time,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// q homogeneous vector payload.
|
||||
///
|
||||
/// All fixed-width numeric types store their data as raw [`Bytes`], enabling
|
||||
/// zero-copy slicing from the IPC frame buffer during decode. Typed access
|
||||
/// is provided via `as_*_slice()` methods using `bytemuck::cast_slice`.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum VectorData {
|
||||
Boolean(Bytes),
|
||||
Guid(Bytes),
|
||||
Byte(Bytes),
|
||||
Short(Bytes),
|
||||
Int(Bytes),
|
||||
Long(Bytes),
|
||||
Real(Bytes),
|
||||
Float(Bytes),
|
||||
Char(Bytes),
|
||||
Symbol(Vec<Bytes>),
|
||||
Timestamp(Bytes),
|
||||
Month(Bytes),
|
||||
Date(Bytes),
|
||||
Datetime(Bytes),
|
||||
Timespan(Bytes),
|
||||
Minute(Bytes),
|
||||
Second(Bytes),
|
||||
Time(Bytes),
|
||||
}
|
||||
|
||||
impl VectorData {
|
||||
pub fn primitive(&self) -> Primitive {
|
||||
match self {
|
||||
Self::Boolean(_) => Primitive::Boolean,
|
||||
Self::Guid(_) => Primitive::Guid,
|
||||
Self::Byte(_) => Primitive::Byte,
|
||||
Self::Short(_) => Primitive::Short,
|
||||
Self::Int(_) => Primitive::Int,
|
||||
Self::Long(_) => Primitive::Long,
|
||||
Self::Real(_) => Primitive::Real,
|
||||
Self::Float(_) => Primitive::Float,
|
||||
Self::Char(_) => Primitive::Char,
|
||||
Self::Symbol(_) => Primitive::Symbol,
|
||||
Self::Timestamp(_) => Primitive::Timestamp,
|
||||
Self::Month(_) => Primitive::Month,
|
||||
Self::Date(_) => Primitive::Date,
|
||||
Self::Datetime(_) => Primitive::Datetime,
|
||||
Self::Timespan(_) => Primitive::Timespan,
|
||||
Self::Minute(_) => Primitive::Minute,
|
||||
Self::Second(_) => Primitive::Second,
|
||||
Self::Time(_) => Primitive::Time,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Self::Boolean(b) | Self::Byte(b) | Self::Char(b) => b.len(),
|
||||
Self::Guid(b) => b.len() / 16,
|
||||
Self::Short(b) => b.len() / 2,
|
||||
Self::Int(b)
|
||||
| Self::Month(b)
|
||||
| Self::Date(b)
|
||||
| Self::Minute(b)
|
||||
| Self::Second(b)
|
||||
| Self::Time(b)
|
||||
| Self::Real(b) => b.len() / 4,
|
||||
Self::Long(b)
|
||||
| Self::Timestamp(b)
|
||||
| Self::Timespan(b)
|
||||
| Self::Float(b)
|
||||
| Self::Datetime(b) => b.len() / 8,
|
||||
Self::Symbol(v) => v.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
/// Returns the underlying raw bytes for non-Symbol variants.
|
||||
pub fn raw_bytes(&self) -> Option<&Bytes> {
|
||||
match self {
|
||||
Self::Symbol(_) => None,
|
||||
Self::Boolean(b)
|
||||
| Self::Guid(b)
|
||||
| Self::Byte(b)
|
||||
| Self::Short(b)
|
||||
| Self::Int(b)
|
||||
| Self::Long(b)
|
||||
| Self::Real(b)
|
||||
| Self::Float(b)
|
||||
| Self::Char(b)
|
||||
| Self::Timestamp(b)
|
||||
| Self::Month(b)
|
||||
| Self::Date(b)
|
||||
| Self::Datetime(b)
|
||||
| Self::Timespan(b)
|
||||
| Self::Minute(b)
|
||||
| Self::Second(b)
|
||||
| Self::Time(b) => Some(b),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_i16_slice(&self) -> &[i16] {
|
||||
match self {
|
||||
Self::Short(b) => bytemuck::cast_slice(b),
|
||||
_ => panic!("as_i16_slice called on {:?}", self.primitive()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_i32_slice(&self) -> &[i32] {
|
||||
match self {
|
||||
Self::Int(b)
|
||||
| Self::Month(b)
|
||||
| Self::Date(b)
|
||||
| Self::Minute(b)
|
||||
| Self::Second(b)
|
||||
| Self::Time(b) => bytemuck::cast_slice(b),
|
||||
_ => panic!("as_i32_slice called on {:?}", self.primitive()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_i64_slice(&self) -> &[i64] {
|
||||
match self {
|
||||
Self::Long(b) | Self::Timestamp(b) | Self::Timespan(b) => bytemuck::cast_slice(b),
|
||||
_ => panic!("as_i64_slice called on {:?}", self.primitive()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_f32_slice(&self) -> &[f32] {
|
||||
match self {
|
||||
Self::Real(b) => bytemuck::cast_slice(b),
|
||||
_ => panic!("as_f32_slice called on {:?}", self.primitive()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_f64_slice(&self) -> &[f64] {
|
||||
match self {
|
||||
Self::Float(b) | Self::Datetime(b) => bytemuck::cast_slice(b),
|
||||
_ => panic!("as_f64_slice called on {:?}", self.primitive()),
|
||||
}
|
||||
}
|
||||
|
||||
// Construction helpers for tests and ingestion paths.
|
||||
|
||||
pub fn from_i16s(values: &[i16]) -> Self {
|
||||
Self::Short(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_i32s(values: &[i32]) -> Self {
|
||||
Self::Int(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_i64s(values: &[i64]) -> Self {
|
||||
Self::Long(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_f32s(values: &[f32]) -> Self {
|
||||
Self::Real(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_f64s(values: &[f64]) -> Self {
|
||||
Self::Float(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_guids(values: &[[u8; 16]]) -> Self {
|
||||
let mut buf = Vec::with_capacity(values.len() * 16);
|
||||
for guid in values {
|
||||
buf.extend_from_slice(guid);
|
||||
}
|
||||
Self::Guid(Bytes::from(buf))
|
||||
}
|
||||
|
||||
pub fn from_timestamps(values: &[i64]) -> Self {
|
||||
Self::Timestamp(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_months(values: &[i32]) -> Self {
|
||||
Self::Month(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_dates(values: &[i32]) -> Self {
|
||||
Self::Date(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_datetimes(values: &[f64]) -> Self {
|
||||
Self::Datetime(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_timespans(values: &[i64]) -> Self {
|
||||
Self::Timespan(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_minutes(values: &[i32]) -> Self {
|
||||
Self::Minute(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_seconds(values: &[i32]) -> Self {
|
||||
Self::Second(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
|
||||
pub fn from_times(values: &[i32]) -> Self {
|
||||
Self::Time(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||
}
|
||||
}
|
||||
|
||||
/// q homogeneous vector with an attached q attribute.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Vector {
|
||||
attribute: Attribute,
|
||||
data: VectorData,
|
||||
}
|
||||
|
||||
impl Vector {
|
||||
pub fn new(attribute: Attribute, data: VectorData) -> Self {
|
||||
Self { attribute, data }
|
||||
}
|
||||
|
||||
pub fn attribute(&self) -> Attribute {
|
||||
self.attribute
|
||||
}
|
||||
|
||||
pub fn primitive(&self) -> Primitive {
|
||||
self.data.primitive()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.data.is_empty()
|
||||
}
|
||||
|
||||
pub fn data(&self) -> &VectorData {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
/// q general list.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct List {
|
||||
attribute: Attribute,
|
||||
values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl List {
|
||||
pub fn new(attribute: Attribute, values: Vec<Value>) -> Self {
|
||||
Self { attribute, values }
|
||||
}
|
||||
|
||||
pub fn attribute(&self) -> Attribute {
|
||||
self.attribute
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.values.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.values.is_empty()
|
||||
}
|
||||
|
||||
pub fn values(&self) -> &[Value] {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
/// q dictionary.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Dictionary {
|
||||
sorted: bool,
|
||||
keys: Box<Value>,
|
||||
values: Box<Value>,
|
||||
}
|
||||
|
||||
impl Dictionary {
|
||||
pub fn new(sorted: bool, keys: Value, values: Value) -> Self {
|
||||
Self {
|
||||
sorted,
|
||||
keys: Box::new(keys),
|
||||
values: Box::new(values),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sorted(&self) -> bool {
|
||||
self.sorted
|
||||
}
|
||||
|
||||
pub fn keys(&self) -> &Value {
|
||||
&self.keys
|
||||
}
|
||||
|
||||
pub fn values(&self) -> &Value {
|
||||
&self.values
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.keys.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> CoreResult<()> {
|
||||
if self.keys.len() != self.values.len() {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"q dictionary key/value lengths differ: {} != {}",
|
||||
self.keys.len(),
|
||||
self.values.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// q table.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Table {
|
||||
attribute: Attribute,
|
||||
column_names: Vec<Bytes>,
|
||||
columns: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Table {
|
||||
pub fn new(attribute: Attribute, column_names: Vec<Bytes>, columns: Vec<Value>) -> Self {
|
||||
Self {
|
||||
attribute,
|
||||
column_names,
|
||||
columns,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn attribute(&self) -> Attribute {
|
||||
self.attribute
|
||||
}
|
||||
|
||||
pub fn column_names(&self) -> &[Bytes] {
|
||||
&self.column_names
|
||||
}
|
||||
|
||||
pub fn columns(&self) -> &[Value] {
|
||||
&self.columns
|
||||
}
|
||||
|
||||
pub fn num_columns(&self) -> usize {
|
||||
self.columns.len()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.columns.first().map_or(0, Value::len)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn validate(&self) -> CoreResult<()> {
|
||||
if self.column_names.len() != self.columns.len() {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"q table column name count {} does not match column count {}",
|
||||
self.column_names.len(),
|
||||
self.columns.len()
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(expected_rows) = self.columns.first().map(Value::len) {
|
||||
for column in self.columns.iter().skip(1) {
|
||||
if column.len() != expected_rows {
|
||||
return Err(CoreError::InvalidStructure(format!(
|
||||
"q table column lengths differ: expected {expected_rows}, found {}",
|
||||
column.len()
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Decoded q value subset currently supported by the rewrite.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Value {
|
||||
Atom(Atom),
|
||||
Vector(Vector),
|
||||
List(List),
|
||||
Dictionary(Dictionary),
|
||||
Table(Table),
|
||||
UnaryPrimitive { opcode: i8 },
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn qtype(&self) -> ValueType {
|
||||
match self {
|
||||
Self::Atom(atom) => ValueType::atom(atom.primitive()),
|
||||
Self::Vector(vector) => ValueType::vector(vector.primitive(), vector.attribute()),
|
||||
Self::List(list) => ValueType::list(list.attribute()),
|
||||
Self::Dictionary(dictionary) => ValueType::dictionary(dictionary.sorted()),
|
||||
Self::Table(table) => ValueType::table(table.attribute()),
|
||||
Self::UnaryPrimitive { .. } => ValueType::unary_primitive(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Self::Atom(_) | Self::UnaryPrimitive { .. } => 1,
|
||||
Self::Vector(vector) => vector.len(),
|
||||
Self::List(list) => list.len(),
|
||||
Self::Dictionary(dictionary) => dictionary.len(),
|
||||
Self::Table(table) => table.len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::Atom(_) | Self::UnaryPrimitive { .. } => false,
|
||||
Self::Vector(vector) => vector.is_empty(),
|
||||
Self::List(list) => list.is_empty(),
|
||||
Self::Dictionary(dictionary) => dictionary.is_empty(),
|
||||
Self::Table(table) => table.is_empty(),
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue