1576 lines
50 KiB
Rust
1576 lines
50 KiB
Rust
//! Arrow ingestion: converts Arrow arrays and record batches into q `Value` trees.
|
||
//!
|
||
//! This is the reverse direction of [`crate::projection`]. Arrow field
|
||
//! metadata produced by the projection layer (`qroissant.shape`,
|
||
//! `qroissant.primitive`, etc.) is consumed here so that round-trips through
|
||
//! Arrow preserve exact q semantics.
|
||
//!
|
||
//! No PyO3 or Python dependencies are allowed in this crate; PyCapsule
|
||
//! handling lives in `qroissant-python`.
|
||
|
||
use arrow_array::Array;
|
||
use arrow_array::ArrayRef;
|
||
use arrow_array::BinaryArray;
|
||
use arrow_array::BinaryViewArray;
|
||
use arrow_array::BooleanArray;
|
||
use arrow_array::Date32Array;
|
||
use arrow_array::DurationMicrosecondArray;
|
||
use arrow_array::DurationMillisecondArray;
|
||
use arrow_array::DurationNanosecondArray;
|
||
use arrow_array::DurationSecondArray;
|
||
use arrow_array::FixedSizeBinaryArray;
|
||
use arrow_array::Float32Array;
|
||
use arrow_array::Float64Array;
|
||
use arrow_array::Int16Array;
|
||
use arrow_array::Int32Array;
|
||
use arrow_array::Int64Array;
|
||
use arrow_array::LargeBinaryArray;
|
||
use arrow_array::LargeListArray;
|
||
use arrow_array::LargeStringArray;
|
||
use arrow_array::ListArray;
|
||
use arrow_array::MapArray;
|
||
use arrow_array::RecordBatch;
|
||
use arrow_array::StringArray;
|
||
use arrow_array::StringViewArray;
|
||
use arrow_array::StructArray;
|
||
use arrow_array::Time32MillisecondArray;
|
||
use arrow_array::Time32SecondArray;
|
||
use arrow_array::Time64MicrosecondArray;
|
||
use arrow_array::Time64NanosecondArray;
|
||
use arrow_array::TimestampMicrosecondArray;
|
||
use arrow_array::TimestampMillisecondArray;
|
||
use arrow_array::TimestampNanosecondArray;
|
||
use arrow_array::TimestampSecondArray;
|
||
use arrow_array::UInt8Array;
|
||
use arrow_schema::DataType;
|
||
use arrow_schema::Field as ArrowField;
|
||
use arrow_schema::SchemaRef;
|
||
use arrow_schema::TimeUnit;
|
||
use qroissant_core::Atom;
|
||
use qroissant_core::Attribute;
|
||
use qroissant_core::Dictionary;
|
||
use qroissant_core::List;
|
||
use qroissant_core::Table;
|
||
use qroissant_core::Value;
|
||
use qroissant_core::Vector;
|
||
use qroissant_core::VectorData;
|
||
use qroissant_kernels::nulls::Q_NULL_DATE;
|
||
use qroissant_kernels::nulls::Q_NULL_MINUTE;
|
||
use qroissant_kernels::nulls::Q_NULL_SECOND;
|
||
use qroissant_kernels::nulls::Q_NULL_SHORT;
|
||
use qroissant_kernels::nulls::Q_NULL_TIME;
|
||
use qroissant_kernels::nulls::Q_NULL_TIMESPAN;
|
||
use qroissant_kernels::nulls::Q_NULL_TIMESTAMP;
|
||
use qroissant_kernels::temporal::DATE_OFFSET_DAYS;
|
||
use qroissant_kernels::temporal::TIMESTAMP_OFFSET_NS;
|
||
|
||
use crate::error::IngestionError;
|
||
use crate::error::IngestionResult;
|
||
|
||
/// Converts a `Vec<T>` to `bytes::Bytes` via zero-copy reinterpretation.
|
||
fn vec_to_bytes<T: bytemuck::NoUninit>(values: Vec<T>) -> bytes::Bytes {
|
||
// Safety: bytemuck::cast_vec requires NoUninit, which guarantees no padding.
|
||
let byte_vec: Vec<u8> = bytemuck::allocation::cast_vec(values);
|
||
bytes::Bytes::from(byte_vec)
|
||
}
|
||
use crate::metadata::ATTRIBUTE_KEY;
|
||
use crate::metadata::PRIMITIVE_KEY;
|
||
use crate::metadata::SHAPE_KEY;
|
||
use crate::metadata::SORTED_KEY;
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Metadata hint extraction
|
||
// ---------------------------------------------------------------------------
|
||
|
||
#[derive(Clone, Copy, Default, Debug)]
|
||
struct IngestHint {
|
||
shape: Option<IngestShape>,
|
||
primitive: Option<IngestPrimitive>,
|
||
attribute: Option<Attribute>,
|
||
sorted: Option<bool>,
|
||
}
|
||
|
||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||
enum IngestShape {
|
||
Atom,
|
||
Vector,
|
||
List,
|
||
Dictionary,
|
||
Table,
|
||
UnaryPrimitive,
|
||
}
|
||
|
||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||
enum IngestPrimitive {
|
||
Boolean,
|
||
Guid,
|
||
Byte,
|
||
Short,
|
||
Int,
|
||
Long,
|
||
Real,
|
||
Float,
|
||
Char,
|
||
Symbol,
|
||
Timestamp,
|
||
Month,
|
||
Date,
|
||
Datetime,
|
||
Timespan,
|
||
Minute,
|
||
Second,
|
||
Time,
|
||
}
|
||
|
||
fn hint_from_field(field: &ArrowField) -> IngestHint {
|
||
let meta = field.metadata();
|
||
IngestHint {
|
||
shape: meta.get(SHAPE_KEY).and_then(|s| parse_shape(s)),
|
||
primitive: meta.get(PRIMITIVE_KEY).and_then(|s| parse_primitive(s)),
|
||
attribute: meta.get(ATTRIBUTE_KEY).and_then(|s| parse_attribute(s)),
|
||
sorted: meta.get(SORTED_KEY).and_then(|s| s.parse::<bool>().ok()),
|
||
}
|
||
}
|
||
|
||
fn parse_shape(s: &str) -> Option<IngestShape> {
|
||
match s {
|
||
"atom" => Some(IngestShape::Atom),
|
||
"vector" => Some(IngestShape::Vector),
|
||
"list" => Some(IngestShape::List),
|
||
"dictionary" => Some(IngestShape::Dictionary),
|
||
"table" => Some(IngestShape::Table),
|
||
"unary_primitive" => Some(IngestShape::UnaryPrimitive),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn parse_primitive(s: &str) -> Option<IngestPrimitive> {
|
||
match s {
|
||
"boolean" => Some(IngestPrimitive::Boolean),
|
||
"guid" => Some(IngestPrimitive::Guid),
|
||
"byte" => Some(IngestPrimitive::Byte),
|
||
"short" => Some(IngestPrimitive::Short),
|
||
"int" => Some(IngestPrimitive::Int),
|
||
"long" => Some(IngestPrimitive::Long),
|
||
"real" => Some(IngestPrimitive::Real),
|
||
"float" => Some(IngestPrimitive::Float),
|
||
"char" => Some(IngestPrimitive::Char),
|
||
"symbol" => Some(IngestPrimitive::Symbol),
|
||
"timestamp" => Some(IngestPrimitive::Timestamp),
|
||
"month" => Some(IngestPrimitive::Month),
|
||
"date" => Some(IngestPrimitive::Date),
|
||
"datetime" => Some(IngestPrimitive::Datetime),
|
||
"timespan" => Some(IngestPrimitive::Timespan),
|
||
"minute" => Some(IngestPrimitive::Minute),
|
||
"second" => Some(IngestPrimitive::Second),
|
||
"time" => Some(IngestPrimitive::Time),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
fn parse_attribute(s: &str) -> Option<Attribute> {
|
||
match s {
|
||
"none" => Some(Attribute::None),
|
||
"sorted" => Some(Attribute::Sorted),
|
||
"unique" => Some(Attribute::Unique),
|
||
"parted" => Some(Attribute::Parted),
|
||
"grouped" => Some(Attribute::Grouped),
|
||
_ => None,
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Public API
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Convert an Arrow array + field descriptor into a q `Value`.
|
||
pub fn ingest_array(array: ArrayRef, field: &ArrowField) -> IngestionResult<Value> {
|
||
let hint = hint_from_field(field);
|
||
ingest_with_hint(array, hint)
|
||
}
|
||
|
||
/// Convert an Arrow record batch into a q table `Value`.
|
||
pub fn ingest_record_batch(batch: RecordBatch) -> IngestionResult<Value> {
|
||
let schema = batch.schema();
|
||
let mut column_names = Vec::with_capacity(batch.num_columns());
|
||
let mut columns = Vec::with_capacity(batch.num_columns());
|
||
for (index, field) in schema.fields().iter().enumerate() {
|
||
column_names.push(bytes::Bytes::copy_from_slice(field.name().as_bytes()));
|
||
columns.push(ingest_array(batch.column(index).clone(), field.as_ref())?);
|
||
}
|
||
let table = Table::new(Attribute::None, column_names, columns);
|
||
table
|
||
.validate()
|
||
.map_err(|e| IngestionError::Unsupported(e.to_string()))?;
|
||
Ok(Value::Table(table))
|
||
}
|
||
|
||
/// Convert a sequence of record batches (a stream) into a q table `Value`.
|
||
///
|
||
/// All batches must share the same schema. The batches are concatenated using
|
||
/// `arrow_select::concat::concat_batches` before ingestion.
|
||
pub fn ingest_record_batch_reader(
|
||
schema: SchemaRef,
|
||
batches: impl IntoIterator<Item = Result<RecordBatch, arrow_schema::ArrowError>>,
|
||
) -> IngestionResult<Value> {
|
||
let batches: Vec<RecordBatch> = batches.into_iter().collect::<Result<_, _>>()?;
|
||
if batches.is_empty() {
|
||
// Produce an empty table with the correct schema.
|
||
let column_names: Vec<bytes::Bytes> = schema
|
||
.fields()
|
||
.iter()
|
||
.map(|f| bytes::Bytes::copy_from_slice(f.name().as_bytes()))
|
||
.collect();
|
||
let columns: Vec<Value> = schema
|
||
.fields()
|
||
.iter()
|
||
.map(|f| ingest_array(arrow_array::new_empty_array(f.data_type()), f.as_ref()))
|
||
.collect::<Result<_, _>>()?;
|
||
let table = Table::new(Attribute::None, column_names, columns);
|
||
return Ok(Value::Table(table));
|
||
}
|
||
let merged = arrow_select::concat::concat_batches(&schema, &batches)?;
|
||
ingest_record_batch(merged)
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Main dispatch
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_with_hint(array: ArrayRef, hint: IngestHint) -> IngestionResult<Value> {
|
||
let shape = hint
|
||
.shape
|
||
.unwrap_or_else(|| default_shape(array.data_type()));
|
||
|
||
match shape {
|
||
IngestShape::UnaryPrimitive => {
|
||
if array.len() != 1 {
|
||
return Err(IngestionError::Unsupported(format!(
|
||
"unary_primitive shape requires length 1, got {}",
|
||
array.len()
|
||
)));
|
||
}
|
||
Ok(Value::UnaryPrimitive { opcode: -128 })
|
||
}
|
||
IngestShape::Table => ingest_table(array, hint),
|
||
IngestShape::Dictionary => ingest_dictionary(array, hint),
|
||
IngestShape::List => ingest_list(array, hint),
|
||
IngestShape::Atom | IngestShape::Vector => ingest_scalar_or_vector(array, shape, hint),
|
||
}
|
||
}
|
||
|
||
fn default_shape(dt: &DataType) -> IngestShape {
|
||
match dt {
|
||
DataType::Null => IngestShape::List,
|
||
DataType::List(_) | DataType::LargeList(_) => IngestShape::List,
|
||
// Multiple binary blobs default to a list of char vectors.
|
||
// Use explicit metadata (qroissant.shape=vector) for char vector.
|
||
DataType::Binary | DataType::LargeBinary | DataType::BinaryView => IngestShape::List,
|
||
DataType::Map(_, _) => IngestShape::Dictionary,
|
||
DataType::Struct(_) => IngestShape::Table,
|
||
_ => IngestShape::Vector,
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Table ingestion (Struct array)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_table(array: ArrayRef, hint: IngestHint) -> IngestionResult<Value> {
|
||
let attribute = hint.attribute.unwrap_or(Attribute::None);
|
||
|
||
let struct_array = array
|
||
.as_any()
|
||
.downcast_ref::<StructArray>()
|
||
.ok_or_else(|| {
|
||
IngestionError::Unsupported(format!(
|
||
"q table ingestion requires a StructArray, found {}",
|
||
array.data_type()
|
||
))
|
||
})?;
|
||
|
||
let fields = match array.data_type() {
|
||
DataType::Struct(fields) => fields.clone(),
|
||
other => {
|
||
return Err(IngestionError::Unsupported(format!(
|
||
"q table ingestion requires a struct field, found {other}"
|
||
)));
|
||
}
|
||
};
|
||
|
||
let mut column_names = Vec::with_capacity(fields.len());
|
||
let mut columns = Vec::with_capacity(fields.len());
|
||
for (i, child_field) in fields.iter().enumerate() {
|
||
column_names.push(bytes::Bytes::copy_from_slice(child_field.name().as_bytes()));
|
||
columns.push(ingest_array(
|
||
struct_array.column(i).clone(),
|
||
child_field.as_ref(),
|
||
)?);
|
||
}
|
||
|
||
let table = Table::new(attribute, column_names, columns);
|
||
table
|
||
.validate()
|
||
.map_err(|e| IngestionError::Unsupported(e.to_string()))?;
|
||
Ok(Value::Table(table))
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Dictionary ingestion (Map array)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_dictionary(array: ArrayRef, hint: IngestHint) -> IngestionResult<Value> {
|
||
let map_array = array.as_any().downcast_ref::<MapArray>().ok_or_else(|| {
|
||
IngestionError::Unsupported(format!(
|
||
"q dictionary ingestion requires a MapArray, found {}",
|
||
array.data_type()
|
||
))
|
||
})?;
|
||
|
||
if map_array.len() != 1 || map_array.is_null(0) {
|
||
return Err(IngestionError::Unsupported(
|
||
"q dictionary ingestion requires a non-null length-1 Arrow map".to_string(),
|
||
));
|
||
}
|
||
|
||
let entries = map_array.value(0);
|
||
let sorted = hint.sorted.unwrap_or(false);
|
||
let entry_fields = entries.fields().clone();
|
||
let keys = ingest_array(entries.column(0).clone(), entry_fields[0].as_ref())?;
|
||
let values = ingest_array(entries.column(1).clone(), entry_fields[1].as_ref())?;
|
||
let dict = Dictionary::new(sorted, keys, values);
|
||
dict.validate()
|
||
.map_err(|e| IngestionError::Unsupported(e.to_string()))?;
|
||
Ok(Value::Dictionary(dict))
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// List ingestion (List / LargeList / Binary / BinaryView arrays)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_list(array: ArrayRef, hint: IngestHint) -> IngestionResult<Value> {
|
||
let attribute = hint.attribute.unwrap_or(Attribute::None);
|
||
|
||
match array.data_type() {
|
||
DataType::Null => {
|
||
let values = (0..array.len())
|
||
.map(|_| Value::UnaryPrimitive { opcode: -128 })
|
||
.collect();
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
DataType::List(child_field) => {
|
||
let child_field = child_field.clone();
|
||
let list_array = array
|
||
.as_any()
|
||
.downcast_ref::<ListArray>()
|
||
.expect("List datatype must match ListArray");
|
||
let mut values = Vec::with_capacity(list_array.len());
|
||
for i in 0..list_array.len() {
|
||
let child = list_array.value(i);
|
||
values.push(ingest_array(child, child_field.as_ref())?);
|
||
}
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
DataType::LargeList(child_field) => {
|
||
let child_field = child_field.clone();
|
||
let list_array = array
|
||
.as_any()
|
||
.downcast_ref::<LargeListArray>()
|
||
.expect("LargeList datatype must match LargeListArray");
|
||
let mut values = Vec::with_capacity(list_array.len());
|
||
for i in 0..list_array.len() {
|
||
let child = list_array.value(i);
|
||
values.push(ingest_array(child, child_field.as_ref())?);
|
||
}
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
DataType::Binary => {
|
||
let binary = array
|
||
.as_any()
|
||
.downcast_ref::<BinaryArray>()
|
||
.expect("Binary datatype must match BinaryArray");
|
||
let values = (0..binary.len())
|
||
.map(|i| {
|
||
Value::Vector(Vector::new(
|
||
Attribute::None,
|
||
VectorData::Char(bytes::Bytes::copy_from_slice(binary.value(i))),
|
||
))
|
||
})
|
||
.collect();
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
DataType::LargeBinary => {
|
||
let binary = array
|
||
.as_any()
|
||
.downcast_ref::<LargeBinaryArray>()
|
||
.expect("LargeBinary datatype must match LargeBinaryArray");
|
||
let values = (0..binary.len())
|
||
.map(|i| {
|
||
Value::Vector(Vector::new(
|
||
Attribute::None,
|
||
VectorData::Char(bytes::Bytes::copy_from_slice(binary.value(i))),
|
||
))
|
||
})
|
||
.collect();
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
DataType::BinaryView => {
|
||
let binary = array
|
||
.as_any()
|
||
.downcast_ref::<BinaryViewArray>()
|
||
.expect("BinaryView datatype must match BinaryViewArray");
|
||
let values = (0..binary.len())
|
||
.map(|i| {
|
||
Value::Vector(Vector::new(
|
||
Attribute::None,
|
||
VectorData::Char(bytes::Bytes::copy_from_slice(binary.value(i))),
|
||
))
|
||
})
|
||
.collect();
|
||
Ok(Value::List(List::new(attribute, values)))
|
||
}
|
||
other => Err(IngestionError::Unsupported(format!(
|
||
"q list ingestion from Arrow data type {other} is not supported"
|
||
))),
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Scalar / vector ingestion
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_scalar_or_vector(
|
||
array: ArrayRef,
|
||
shape: IngestShape,
|
||
hint: IngestHint,
|
||
) -> IngestionResult<Value> {
|
||
let attribute = hint.attribute.unwrap_or(Attribute::None);
|
||
let is_atom = shape == IngestShape::Atom;
|
||
|
||
if is_atom && array.len() != 1 {
|
||
return Err(IngestionError::Unsupported(format!(
|
||
"q atom shape requested but Arrow array has length {}",
|
||
array.len()
|
||
)));
|
||
}
|
||
|
||
match array.data_type() {
|
||
DataType::Boolean => ingest_boolean(&array, is_atom, attribute),
|
||
DataType::UInt8 => {
|
||
let prim = hint.primitive.unwrap_or(IngestPrimitive::Byte);
|
||
ingest_u8(&array, prim, is_atom, attribute)
|
||
}
|
||
DataType::Int16 => ingest_i16(&array, is_atom, attribute),
|
||
DataType::Int32 => {
|
||
let prim = hint.primitive.unwrap_or(IngestPrimitive::Int);
|
||
ingest_i32(&array, prim, is_atom, attribute)
|
||
}
|
||
DataType::Int64 => ingest_i64(&array, is_atom, attribute),
|
||
DataType::Float32 => ingest_f32(&array, is_atom, attribute),
|
||
DataType::Float64 => {
|
||
let prim = hint.primitive.unwrap_or(IngestPrimitive::Float);
|
||
ingest_f64(&array, prim, is_atom, attribute)
|
||
}
|
||
DataType::FixedSizeBinary(1) => {
|
||
let prim = hint.primitive.unwrap_or(IngestPrimitive::Char);
|
||
ingest_fixed_binary_1(&array, prim, is_atom, attribute)
|
||
}
|
||
DataType::FixedSizeBinary(16) => ingest_fixed_binary_16(&array, is_atom, attribute),
|
||
DataType::Utf8 => ingest_symbols_utf8(&array, is_atom, attribute),
|
||
DataType::LargeUtf8 => ingest_symbols_large_utf8(&array, is_atom, attribute),
|
||
DataType::Utf8View => ingest_symbols_utf8_view(&array, is_atom, attribute),
|
||
DataType::Dictionary(_, _) => ingest_symbols_dictionary(&array, is_atom, attribute),
|
||
DataType::Binary => ingest_binary_as_char(&array, is_atom, attribute),
|
||
DataType::LargeBinary => ingest_large_binary_as_char(&array, is_atom, attribute),
|
||
DataType::BinaryView => ingest_binary_view_as_char(&array, is_atom, attribute),
|
||
DataType::Date32 => ingest_date32(&array, is_atom, attribute),
|
||
DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
|
||
if tz.is_some() {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow timestamps with timezone cannot be ingested into q".to_string(),
|
||
));
|
||
}
|
||
ingest_timestamp_ns(&array, is_atom, attribute)
|
||
}
|
||
DataType::Timestamp(TimeUnit::Microsecond, tz) => {
|
||
if tz.is_some() {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow timestamps with timezone cannot be ingested into q".to_string(),
|
||
));
|
||
}
|
||
ingest_timestamp_us(&array, is_atom, attribute)
|
||
}
|
||
DataType::Timestamp(TimeUnit::Millisecond, tz) => {
|
||
if tz.is_some() {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow timestamps with timezone cannot be ingested into q".to_string(),
|
||
));
|
||
}
|
||
ingest_timestamp_ms(&array, is_atom, attribute)
|
||
}
|
||
DataType::Timestamp(TimeUnit::Second, tz) => {
|
||
if tz.is_some() {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow timestamps with timezone cannot be ingested into q".to_string(),
|
||
));
|
||
}
|
||
ingest_timestamp_s(&array, is_atom, attribute)
|
||
}
|
||
DataType::Duration(TimeUnit::Nanosecond) => ingest_duration_ns(&array, is_atom, attribute),
|
||
DataType::Duration(TimeUnit::Microsecond) => ingest_duration_us(&array, is_atom, attribute),
|
||
DataType::Duration(TimeUnit::Millisecond) => ingest_duration_ms(&array, is_atom, attribute),
|
||
DataType::Duration(TimeUnit::Second) => ingest_duration_s(&array, is_atom, attribute),
|
||
DataType::Time32(TimeUnit::Second) => {
|
||
let prim = hint.primitive.unwrap_or(IngestPrimitive::Second);
|
||
ingest_time32_second(&array, prim, is_atom, attribute)
|
||
}
|
||
DataType::Time32(TimeUnit::Millisecond) => ingest_time32_ms(&array, is_atom, attribute),
|
||
DataType::Time64(TimeUnit::Microsecond) => ingest_time64_us(&array, is_atom, attribute),
|
||
DataType::Time64(TimeUnit::Nanosecond) => ingest_time64_ns(&array, is_atom, attribute),
|
||
other => Err(IngestionError::Unsupported(format!(
|
||
"q ingestion from Arrow data type {other} is not supported"
|
||
))),
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Boolean
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_boolean(array: &ArrayRef, is_atom: bool, attribute: Attribute) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<BooleanArray>()
|
||
.expect("Boolean datatype must match BooleanArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow boolean arrays with nulls cannot be ingested as q boolean vectors; \
|
||
use a general list shape instead"
|
||
.to_string(),
|
||
));
|
||
}
|
||
|
||
let values: Vec<u8> = (0..arr.len())
|
||
.map(|i| if arr.value(i) { 1 } else { 0 })
|
||
.collect();
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Boolean(values[0] != 0)))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Boolean(bytes::Bytes::from(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// UInt8 (Byte or Char)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_u8(
|
||
array: &ArrayRef,
|
||
prim: IngestPrimitive,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<UInt8Array>()
|
||
.expect("UInt8 datatype must match UInt8Array");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow UInt8 arrays with nulls cannot be ingested as q byte/char".to_string(),
|
||
));
|
||
}
|
||
|
||
let values: Vec<u8> = arr.values().to_vec();
|
||
match prim {
|
||
IngestPrimitive::Char => {
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Char(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Char(bytes::Bytes::from(values)),
|
||
)))
|
||
}
|
||
}
|
||
_ => {
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Byte(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Byte(bytes::Bytes::from(values)),
|
||
)))
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Int16 (Short)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_i16(array: &ArrayRef, is_atom: bool, attribute: Attribute) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Int16Array>()
|
||
.expect("Int16 datatype must match Int16Array");
|
||
|
||
let mut values: Vec<i16> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_SHORT;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Short(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Short(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Int32 (Int, Month, Date, Minute, Second, Time)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_i32(
|
||
array: &ArrayRef,
|
||
prim: IngestPrimitive,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Int32Array>()
|
||
.expect("Int32 datatype must match Int32Array");
|
||
|
||
let mut values: Vec<i32> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
let null_sentinel = i32::MIN; // all i32 q types share i32::MIN as null
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = null_sentinel;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
let v = values[0];
|
||
let atom = match prim {
|
||
IngestPrimitive::Month => Atom::Month(v),
|
||
IngestPrimitive::Date => Atom::Date(v),
|
||
IngestPrimitive::Minute => Atom::Minute(v),
|
||
IngestPrimitive::Second => Atom::Second(v),
|
||
IngestPrimitive::Time => Atom::Time(v),
|
||
_ => Atom::Int(v),
|
||
};
|
||
Ok(Value::Atom(atom))
|
||
} else {
|
||
let bytes = vec_to_bytes(values);
|
||
let data = match prim {
|
||
IngestPrimitive::Month => VectorData::Month(bytes),
|
||
IngestPrimitive::Date => VectorData::Date(bytes),
|
||
IngestPrimitive::Minute => VectorData::Minute(bytes),
|
||
IngestPrimitive::Second => VectorData::Second(bytes),
|
||
IngestPrimitive::Time => VectorData::Time(bytes),
|
||
_ => VectorData::Int(bytes),
|
||
};
|
||
Ok(Value::Vector(Vector::new(attribute, data)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Int64 (Long)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_i64(array: &ArrayRef, is_atom: bool, attribute: Attribute) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Int64Array>()
|
||
.expect("Int64 datatype must match Int64Array");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = i64::MIN;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Long(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Long(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Float32 (Real)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_f32(array: &ArrayRef, is_atom: bool, attribute: Attribute) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Float32Array>()
|
||
.expect("Float32 datatype must match Float32Array");
|
||
|
||
let mut values: Vec<f32> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = f32::NAN;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Real(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Real(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Float64 (Float, Datetime)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_f64(
|
||
array: &ArrayRef,
|
||
prim: IngestPrimitive,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Float64Array>()
|
||
.expect("Float64 datatype must match Float64Array");
|
||
|
||
let mut values: Vec<f64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = f64::NAN;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
let v = values[0];
|
||
let atom = match prim {
|
||
IngestPrimitive::Datetime => Atom::Datetime(v),
|
||
_ => Atom::Float(v),
|
||
};
|
||
Ok(Value::Atom(atom))
|
||
} else {
|
||
let bytes = vec_to_bytes(values);
|
||
let data = match prim {
|
||
IngestPrimitive::Datetime => VectorData::Datetime(bytes),
|
||
_ => VectorData::Float(bytes),
|
||
};
|
||
Ok(Value::Vector(Vector::new(attribute, data)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// FixedSizeBinary(1) – Char or Byte
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_fixed_binary_1(
|
||
array: &ArrayRef,
|
||
prim: IngestPrimitive,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<FixedSizeBinaryArray>()
|
||
.expect("FixedSizeBinary(1) datatype must match FixedSizeBinaryArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow FixedSizeBinary(1) arrays with nulls cannot be ingested as q char/byte"
|
||
.to_string(),
|
||
));
|
||
}
|
||
|
||
let values: Vec<u8> = (0..arr.len()).map(|i| arr.value(i)[0]).collect();
|
||
match prim {
|
||
IngestPrimitive::Byte => {
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Byte(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Byte(bytes::Bytes::from(values)),
|
||
)))
|
||
}
|
||
}
|
||
_ => {
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Char(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Char(bytes::Bytes::from(values)),
|
||
)))
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// FixedSizeBinary(16) – Guid
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_fixed_binary_16(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<FixedSizeBinaryArray>()
|
||
.expect("FixedSizeBinary(16) datatype must match FixedSizeBinaryArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow FixedSizeBinary(16) arrays with nulls cannot be ingested as q guid".to_string(),
|
||
));
|
||
}
|
||
|
||
let values: Vec<[u8; 16]> = (0..arr.len())
|
||
.map(|i| {
|
||
let mut buf = [0u8; 16];
|
||
buf.copy_from_slice(arr.value(i));
|
||
buf
|
||
})
|
||
.collect();
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Guid(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::from_guids(&values),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Symbol (various string types)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn strings_to_symbol_value(strings: Vec<Vec<u8>>, is_atom: bool, attribute: Attribute) -> Value {
|
||
if is_atom {
|
||
Value::Atom(Atom::Symbol(bytes::Bytes::from(
|
||
strings.into_iter().next().unwrap_or_default(),
|
||
)))
|
||
} else {
|
||
Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Symbol(strings.into_iter().map(bytes::Bytes::from).collect()),
|
||
))
|
||
}
|
||
}
|
||
|
||
fn ingest_symbols_utf8(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<StringArray>()
|
||
.expect("Utf8 datatype must match StringArray");
|
||
let values: Vec<Vec<u8>> = (0..arr.len())
|
||
.map(|i| {
|
||
if arr.is_null(i) {
|
||
vec![]
|
||
} else {
|
||
arr.value(i).as_bytes().to_vec()
|
||
}
|
||
})
|
||
.collect();
|
||
Ok(strings_to_symbol_value(values, is_atom, attribute))
|
||
}
|
||
|
||
fn ingest_symbols_large_utf8(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<LargeStringArray>()
|
||
.expect("LargeUtf8 datatype must match LargeStringArray");
|
||
let values: Vec<Vec<u8>> = (0..arr.len())
|
||
.map(|i| {
|
||
if arr.is_null(i) {
|
||
vec![]
|
||
} else {
|
||
arr.value(i).as_bytes().to_vec()
|
||
}
|
||
})
|
||
.collect();
|
||
Ok(strings_to_symbol_value(values, is_atom, attribute))
|
||
}
|
||
|
||
fn ingest_symbols_utf8_view(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<StringViewArray>()
|
||
.expect("Utf8View datatype must match StringViewArray");
|
||
let values: Vec<Vec<u8>> = (0..arr.len())
|
||
.map(|i| {
|
||
if arr.is_null(i) {
|
||
vec![]
|
||
} else {
|
||
arr.value(i).as_bytes().to_vec()
|
||
}
|
||
})
|
||
.collect();
|
||
Ok(strings_to_symbol_value(values, is_atom, attribute))
|
||
}
|
||
|
||
fn ingest_symbols_dictionary(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
macro_rules! try_dict_type {
|
||
($key_type:ty) => {{
|
||
if let Some(dict) = array
|
||
.as_any()
|
||
.downcast_ref::<arrow_array::DictionaryArray<$key_type>>()
|
||
{
|
||
let values_arr = dict.values();
|
||
let strings: Vec<Vec<u8>> = (0..dict.len())
|
||
.map(|i| {
|
||
if dict.is_null(i) {
|
||
return vec![];
|
||
}
|
||
let key_idx = dict.key(i).expect("non-null key must have value") as usize;
|
||
if let Some(s) = values_arr.as_any().downcast_ref::<StringArray>() {
|
||
s.value(key_idx).as_bytes().to_vec()
|
||
} else if let Some(s) =
|
||
values_arr.as_any().downcast_ref::<LargeStringArray>()
|
||
{
|
||
s.value(key_idx).as_bytes().to_vec()
|
||
} else if let Some(s) =
|
||
values_arr.as_any().downcast_ref::<StringViewArray>()
|
||
{
|
||
s.value(key_idx).as_bytes().to_vec()
|
||
} else {
|
||
vec![]
|
||
}
|
||
})
|
||
.collect();
|
||
return Ok(strings_to_symbol_value(strings, is_atom, attribute));
|
||
}
|
||
}};
|
||
}
|
||
try_dict_type!(arrow_array::types::Int8Type);
|
||
try_dict_type!(arrow_array::types::Int16Type);
|
||
try_dict_type!(arrow_array::types::Int32Type);
|
||
try_dict_type!(arrow_array::types::Int64Type);
|
||
try_dict_type!(arrow_array::types::UInt8Type);
|
||
try_dict_type!(arrow_array::types::UInt16Type);
|
||
try_dict_type!(arrow_array::types::UInt32Type);
|
||
try_dict_type!(arrow_array::types::UInt64Type);
|
||
Err(IngestionError::Unsupported(
|
||
"Unsupported dictionary key type for symbol ingestion".to_string(),
|
||
))
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Binary → Char vector (single-element binary → char vector)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_binary_as_char(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<BinaryArray>()
|
||
.expect("Binary datatype must match BinaryArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow Binary arrays with nulls cannot be ingested as q char vectors".to_string(),
|
||
));
|
||
}
|
||
if arr.len() != 1 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Multi-element Binary arrays should use List shape for q ingestion".to_string(),
|
||
));
|
||
}
|
||
|
||
let bytes = arr.value(0).to_vec();
|
||
if is_atom && bytes.len() == 1 {
|
||
Ok(Value::Atom(Atom::Char(bytes[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Char(bytes::Bytes::from(bytes)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_large_binary_as_char(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<LargeBinaryArray>()
|
||
.expect("LargeBinary datatype must match LargeBinaryArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow LargeBinary arrays with nulls cannot be ingested as q char vectors".to_string(),
|
||
));
|
||
}
|
||
if arr.len() != 1 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Multi-element LargeBinary arrays should use List shape for q ingestion".to_string(),
|
||
));
|
||
}
|
||
|
||
let bytes = arr.value(0).to_vec();
|
||
if is_atom && bytes.len() == 1 {
|
||
Ok(Value::Atom(Atom::Char(bytes[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Char(bytes::Bytes::from(bytes)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_binary_view_as_char(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<BinaryViewArray>()
|
||
.expect("BinaryView datatype must match BinaryViewArray");
|
||
|
||
if arr.null_count() != 0 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Arrow BinaryView arrays with nulls cannot be ingested as q char vectors".to_string(),
|
||
));
|
||
}
|
||
if arr.len() != 1 {
|
||
return Err(IngestionError::Unsupported(
|
||
"Multi-element BinaryView arrays should use List shape for q ingestion".to_string(),
|
||
));
|
||
}
|
||
|
||
let bytes = arr.value(0).to_vec();
|
||
if is_atom && bytes.len() == 1 {
|
||
Ok(Value::Atom(Atom::Char(bytes[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Char(bytes::Bytes::from(bytes)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Date32 → q Date (days since 2000-01-01)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_date32(array: &ArrayRef, is_atom: bool, attribute: Attribute) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Date32Array>()
|
||
.expect("Date32 datatype must match Date32Array");
|
||
|
||
let mut values: Vec<i32> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_DATE;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_DATE {
|
||
*v = v.saturating_sub(DATE_OFFSET_DAYS);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Date(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Date(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Timestamp → q Timestamp (ns since 2000-01-01)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_timestamp_ns(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<TimestampNanosecondArray>()
|
||
.expect("Timestamp(Nanosecond) must match TimestampNanosecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESTAMP;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESTAMP {
|
||
*v = v.saturating_sub(TIMESTAMP_OFFSET_NS);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timestamp(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timestamp(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_timestamp_us(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<TimestampMicrosecondArray>()
|
||
.expect("Timestamp(Microsecond) must match TimestampMicrosecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESTAMP;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESTAMP {
|
||
*v = v.saturating_mul(1_000).saturating_sub(TIMESTAMP_OFFSET_NS);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timestamp(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timestamp(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_timestamp_ms(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<TimestampMillisecondArray>()
|
||
.expect("Timestamp(Millisecond) must match TimestampMillisecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESTAMP;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESTAMP {
|
||
*v = v
|
||
.saturating_mul(1_000_000)
|
||
.saturating_sub(TIMESTAMP_OFFSET_NS);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timestamp(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timestamp(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_timestamp_s(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<TimestampSecondArray>()
|
||
.expect("Timestamp(Second) must match TimestampSecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESTAMP;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESTAMP {
|
||
*v = v
|
||
.saturating_mul(1_000_000_000)
|
||
.saturating_sub(TIMESTAMP_OFFSET_NS);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timestamp(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timestamp(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Duration → q Timespan (ns)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_duration_ns(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<DurationNanosecondArray>()
|
||
.expect("Duration(Nanosecond) must match DurationNanosecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESPAN;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timespan(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timespan(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_duration_us(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<DurationMicrosecondArray>()
|
||
.expect("Duration(Microsecond) must match DurationMicrosecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESPAN;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESPAN {
|
||
*v = v.saturating_mul(1_000);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timespan(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timespan(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_duration_ms(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<DurationMillisecondArray>()
|
||
.expect("Duration(Millisecond) must match DurationMillisecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESPAN;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESPAN {
|
||
*v = v.saturating_mul(1_000_000);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timespan(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timespan(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
fn ingest_duration_s(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<DurationSecondArray>()
|
||
.expect("Duration(Second) must match DurationSecondArray");
|
||
|
||
let mut values: Vec<i64> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIMESPAN;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != Q_NULL_TIMESPAN {
|
||
*v = v.saturating_mul(1_000_000_000);
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Timespan(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Timespan(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Time32(Second) → q Second or Minute
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_time32_second(
|
||
array: &ArrayRef,
|
||
prim: IngestPrimitive,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Time32SecondArray>()
|
||
.expect("Time32(Second) must match Time32SecondArray");
|
||
|
||
let mut values: Vec<i32> = arr.values().to_vec();
|
||
if prim == IngestPrimitive::Minute {
|
||
let null = Q_NULL_MINUTE;
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = null;
|
||
}
|
||
}
|
||
}
|
||
for v in &mut values {
|
||
if *v != null {
|
||
*v /= 60;
|
||
}
|
||
}
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Minute(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Minute(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
} else {
|
||
let null = Q_NULL_SECOND;
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = null;
|
||
}
|
||
}
|
||
}
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Second(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Second(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Time32(Millisecond) → q Time (ms)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_time32_ms(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Time32MillisecondArray>()
|
||
.expect("Time32(Millisecond) must match Time32MillisecondArray");
|
||
|
||
let mut values: Vec<i32> = arr.values().to_vec();
|
||
if arr.null_count() != 0 {
|
||
for i in 0..arr.len() {
|
||
if arr.is_null(i) {
|
||
values[i] = Q_NULL_TIME;
|
||
}
|
||
}
|
||
}
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Time(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Time(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Time64(Microsecond) → q Time (ms, truncating)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_time64_us(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Time64MicrosecondArray>()
|
||
.expect("Time64(Microsecond) must match Time64MicrosecondArray");
|
||
|
||
let values: Vec<i32> = (0..arr.len())
|
||
.map(|i| {
|
||
if arr.is_null(i) {
|
||
Q_NULL_TIME
|
||
} else {
|
||
(arr.value(i) / 1_000).clamp(i64::from(i32::MIN), i64::from(i32::MAX)) as i32
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Time(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Time(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Time64(Nanosecond) → q Time (ms, truncating)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
fn ingest_time64_ns(
|
||
array: &ArrayRef,
|
||
is_atom: bool,
|
||
attribute: Attribute,
|
||
) -> IngestionResult<Value> {
|
||
let arr = array
|
||
.as_any()
|
||
.downcast_ref::<Time64NanosecondArray>()
|
||
.expect("Time64(Nanosecond) must match Time64NanosecondArray");
|
||
|
||
let values: Vec<i32> = (0..arr.len())
|
||
.map(|i| {
|
||
if arr.is_null(i) {
|
||
Q_NULL_TIME
|
||
} else {
|
||
(arr.value(i) / 1_000_000).clamp(i64::from(i32::MIN), i64::from(i32::MAX)) as i32
|
||
}
|
||
})
|
||
.collect();
|
||
|
||
if is_atom {
|
||
Ok(Value::Atom(Atom::Time(values[0])))
|
||
} else {
|
||
Ok(Value::Vector(Vector::new(
|
||
attribute,
|
||
VectorData::Time(vec_to_bytes(values)),
|
||
)))
|
||
}
|
||
}
|