Vendor qroissant 0.3.0 baseline
This commit is contained in:
commit
53ac90fe84
56 changed files with 18309 additions and 0 deletions
11
crates/qroissant-kernels/Cargo.toml
Normal file
11
crates/qroissant-kernels/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "qroissant-kernels"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
name = "qroissant_kernels"
|
||||
path = "src/lib.rs"
|
||||
|
||||
121
crates/qroissant-kernels/src/boolean.rs
Normal file
121
crates/qroissant-kernels/src/boolean.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
//! SIMD boolean bit-packing for q → Arrow projection.
|
||||
//!
|
||||
//! q stores boolean vectors as one byte per element (`0` = false, `1` = true,
|
||||
//! `2` = null on the wire — see [`crate::nulls::Q_NULL_BOOLEAN_WIRE`]).
|
||||
//! Arrow `BooleanArray` uses a compact bitmap: one bit per element, LSB-first
|
||||
//! within each byte.
|
||||
//!
|
||||
//! [`pack_bool_bytes`] converts a q boolean byte slice into an Arrow-compatible
|
||||
//! packed bitmap using SIMD comparisons, processing `N` bytes per iteration.
|
||||
|
||||
use std::simd::prelude::*;
|
||||
|
||||
/// Packs a slice of q boolean bytes into an Arrow-compatible LSB-first bitmap.
|
||||
///
|
||||
/// Each source byte is treated as non-zero → `1` bit, zero → `0` bit.
|
||||
/// Null bytes (`2`) are treated as truthy here — callers that need a separate
|
||||
/// null buffer should pass in a pre-filtered slice or handle nulls separately.
|
||||
///
|
||||
/// Returns `(bitmap_bytes, element_count)` where `bitmap_bytes` is the packed
|
||||
/// bitmap (length `ceil(src.len() / 8)`) and `element_count == src.len()`.
|
||||
///
|
||||
/// The returned `Vec<u8>` is suitable for wrapping directly into an Arrow
|
||||
/// `arrow_buffer::Buffer` → `arrow_array::types::BooleanBuffer`.
|
||||
#[inline]
|
||||
pub fn pack_bool_bytes(src: &[u8]) -> (Vec<u8>, usize) {
|
||||
let len = src.len();
|
||||
let out_len = len.div_ceil(8);
|
||||
let mut out = vec![0u8; out_len];
|
||||
|
||||
const N: usize = 8;
|
||||
let zero_v = Simd::<u8, N>::splat(0u8);
|
||||
// Number of full 8-byte chunks we can process with SIMD.
|
||||
let n_aligned = (len / N) * N;
|
||||
|
||||
for (i, chunk) in src[..n_aligned].chunks_exact(N).enumerate() {
|
||||
let v = Simd::<u8, N>::from_slice(chunk);
|
||||
// Compare each byte to zero: non-zero → true (1-bit), zero → false (0-bit).
|
||||
let mask: std::simd::Mask<i8, N> = v.simd_ne(zero_v);
|
||||
// `to_bitmask()` produces a u8 with one bit per lane, LSB = lane 0.
|
||||
out[i] = mask.to_bitmask() as u8;
|
||||
}
|
||||
|
||||
// Scalar tail (fewer than N elements remain).
|
||||
if n_aligned < len {
|
||||
let mut tail_byte = 0u8;
|
||||
for (bit, &b) in src[n_aligned..].iter().enumerate() {
|
||||
if b != 0 {
|
||||
tail_byte |= 1u8 << bit;
|
||||
}
|
||||
}
|
||||
out[n_aligned / N] = tail_byte;
|
||||
}
|
||||
|
||||
(out, len)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn pack_empty() {
|
||||
let (bm, n) = pack_bool_bytes(&[]);
|
||||
assert_eq!(n, 0);
|
||||
assert!(bm.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_all_false() {
|
||||
let src = [0u8; 16];
|
||||
let (bm, n) = pack_bool_bytes(&src);
|
||||
assert_eq!(n, 16);
|
||||
assert_eq!(bm, [0u8, 0u8]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_all_true() {
|
||||
let src = [1u8; 16];
|
||||
let (bm, n) = pack_bool_bytes(&src);
|
||||
assert_eq!(n, 16);
|
||||
assert_eq!(bm, [0xFF, 0xFF]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_lsb_first() {
|
||||
// Only the first element is true → bit 0 of byte 0 should be set.
|
||||
let mut src = [0u8; 8];
|
||||
src[0] = 1;
|
||||
let (bm, _) = pack_bool_bytes(&src);
|
||||
assert_eq!(bm[0], 0b00000001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_last_element_in_first_chunk() {
|
||||
// Only the 8th element (index 7) is true → bit 7 of byte 0.
|
||||
let mut src = [0u8; 8];
|
||||
src[7] = 1;
|
||||
let (bm, _) = pack_bool_bytes(&src);
|
||||
assert_eq!(bm[0], 0b10000000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_tail_single() {
|
||||
// 9 elements: first 8 all false, 9th is true → bit 0 of byte 1.
|
||||
let mut src = [0u8; 9];
|
||||
src[8] = 1;
|
||||
let (bm, n) = pack_bool_bytes(&src);
|
||||
assert_eq!(n, 9);
|
||||
assert_eq!(bm.len(), 2);
|
||||
assert_eq!(bm[0], 0x00);
|
||||
assert_eq!(bm[1], 0b00000001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pack_non_zero_is_true() {
|
||||
// Any non-zero value should count as true.
|
||||
let src = [0u8, 2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
|
||||
let (bm, _) = pack_bool_bytes(&src);
|
||||
assert_eq!(bm[0], 0b00000010);
|
||||
}
|
||||
}
|
||||
25
crates/qroissant-kernels/src/lib.rs
Normal file
25
crates/qroissant-kernels/src/lib.rs
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#![feature(portable_simd)]
|
||||
//! SIMD and hot kernels for qroissant.
|
||||
//!
|
||||
//! This crate provides two categories of primitives:
|
||||
//!
|
||||
//! 1. **Constants** – null sentinels and epoch-offset values used throughout
|
||||
//! the workspace to interpret q IPC wire bytes.
|
||||
//!
|
||||
//! 2. **Scalar transforms** – functions that operate on typed Rust slices.
|
||||
//! These are correct scalar implementations; future iterations will add
|
||||
//! `portable_simd` specialisations in this same crate without changing the
|
||||
//! public API consumed by `qroissant-arrow`.
|
||||
//!
|
||||
//! # Architecture rule
|
||||
//! All nightly-sensitive code (`portable_simd`, intrinsics, etc.) must live
|
||||
//! in this crate so that the rest of the workspace can remain on stable if
|
||||
//! needed and so that performance-sensitive code has a single home.
|
||||
|
||||
pub mod boolean;
|
||||
pub mod nulls;
|
||||
pub mod temporal;
|
||||
|
||||
pub use boolean::*;
|
||||
pub use nulls::*;
|
||||
pub use temporal::*;
|
||||
371
crates/qroissant-kernels/src/nulls.rs
Normal file
371
crates/qroissant-kernels/src/nulls.rs
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
//! Null sentinel constants and SIMD-accelerated null-detection helpers for q IPC types.
|
||||
//!
|
||||
//! In q's IPC protocol each fixed-width primitive has a dedicated sentinel value
|
||||
//! that represents a missing (null) element. These constants are consumed by
|
||||
//! both the Arrow projection layer and any serialisation code that needs to
|
||||
//! round-trip q nullability semantics.
|
||||
//!
|
||||
//! Each `validity_*` function returns `None` when the slice contains no nulls
|
||||
//! (the fast path: callers can skip building a null buffer entirely) or
|
||||
//! `Some(Vec<bool>)` where `true` means the element is valid. The null check
|
||||
//! uses `portable_simd` for throughput; the validity-vector build falls back to
|
||||
//! a scalar loop because nulls are the uncommon case.
|
||||
|
||||
use std::simd::prelude::*;
|
||||
|
||||
/// Null sentinel for q short (i16).
|
||||
pub const Q_NULL_SHORT: i16 = i16::MIN;
|
||||
|
||||
/// Null sentinel for q int (i32).
|
||||
pub const Q_NULL_INT: i32 = i32::MIN;
|
||||
|
||||
/// Null sentinel for q long (i64).
|
||||
pub const Q_NULL_LONG: i64 = i64::MIN;
|
||||
|
||||
/// Null sentinel for q timestamp (i64 nanoseconds since 2000.01.01).
|
||||
pub const Q_NULL_TIMESTAMP: i64 = i64::MIN;
|
||||
|
||||
/// Null sentinel for q month (i32 months since 2000.01).
|
||||
pub const Q_NULL_MONTH: i32 = i32::MIN;
|
||||
|
||||
/// Null sentinel for q date (i32 days since 2000.01.01).
|
||||
pub const Q_NULL_DATE: i32 = i32::MIN;
|
||||
|
||||
/// Null sentinel for q timespan (i64 nanoseconds).
|
||||
pub const Q_NULL_TIMESPAN: i64 = i64::MIN;
|
||||
|
||||
/// Null sentinel for q minute (i32 minutes).
|
||||
pub const Q_NULL_MINUTE: i32 = i32::MIN;
|
||||
|
||||
/// Null sentinel for q second (i32 seconds).
|
||||
pub const Q_NULL_SECOND: i32 = i32::MIN;
|
||||
|
||||
/// Null sentinel for q time (i32 milliseconds).
|
||||
pub const Q_NULL_TIME: i32 = i32::MIN;
|
||||
|
||||
/// Byte value used to encode a null boolean in the raw q IPC wire format.
|
||||
/// `0` = false, `1` = true, `2` = null.
|
||||
pub const Q_NULL_BOOLEAN_WIRE: u8 = 2;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Infinity sentinel constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Positive infinity sentinel for q short (i16).
|
||||
pub const Q_INF_SHORT: i16 = i16::MAX;
|
||||
/// Negative infinity sentinel for q short (i16).
|
||||
pub const Q_NINF_SHORT: i16 = i16::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q int (i32).
|
||||
pub const Q_INF_INT: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q int (i32).
|
||||
pub const Q_NINF_INT: i32 = i32::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q long (i64).
|
||||
pub const Q_INF_LONG: i64 = i64::MAX;
|
||||
/// Negative infinity sentinel for q long (i64).
|
||||
pub const Q_NINF_LONG: i64 = i64::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q real (f32).
|
||||
pub const Q_INF_REAL: f32 = f32::INFINITY;
|
||||
/// Negative infinity sentinel for q real (f32).
|
||||
pub const Q_NINF_REAL: f32 = f32::NEG_INFINITY;
|
||||
|
||||
/// Positive infinity sentinel for q float (f64).
|
||||
pub const Q_INF_FLOAT: f64 = f64::INFINITY;
|
||||
/// Negative infinity sentinel for q float (f64).
|
||||
pub const Q_NINF_FLOAT: f64 = f64::NEG_INFINITY;
|
||||
|
||||
/// Positive infinity sentinel for q timestamp (i64 nanoseconds).
|
||||
pub const Q_INF_TIMESTAMP: i64 = i64::MAX;
|
||||
/// Negative infinity sentinel for q timestamp (i64 nanoseconds).
|
||||
pub const Q_NINF_TIMESTAMP: i64 = i64::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q timespan (i64 nanoseconds).
|
||||
pub const Q_INF_TIMESPAN: i64 = i64::MAX;
|
||||
/// Negative infinity sentinel for q timespan (i64 nanoseconds).
|
||||
pub const Q_NINF_TIMESPAN: i64 = i64::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q date (i32 days).
|
||||
pub const Q_INF_DATE: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q date (i32 days).
|
||||
pub const Q_NINF_DATE: i32 = i32::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q month (i32 months).
|
||||
pub const Q_INF_MONTH: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q month (i32 months).
|
||||
pub const Q_NINF_MONTH: i32 = i32::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q minute (i32 minutes).
|
||||
pub const Q_INF_MINUTE: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q minute (i32 minutes).
|
||||
pub const Q_NINF_MINUTE: i32 = i32::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q second (i32 seconds).
|
||||
pub const Q_INF_SECOND: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q second (i32 seconds).
|
||||
pub const Q_NINF_SECOND: i32 = i32::MIN + 1;
|
||||
|
||||
/// Positive infinity sentinel for q time (i32 milliseconds).
|
||||
pub const Q_INF_TIME: i32 = i32::MAX;
|
||||
/// Negative infinity sentinel for q time (i32 milliseconds).
|
||||
pub const Q_NINF_TIME: i32 = i32::MIN + 1;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SIMD null-detection helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Returns a validity vector for a `&[i16]` slice using [`Q_NULL_SHORT`] as
|
||||
/// the sentinel. Returns `None` when no nulls are present.
|
||||
#[inline]
|
||||
pub fn validity_i16(values: &[i16]) -> Option<Vec<bool>> {
|
||||
const N: usize = 32;
|
||||
let sentinel = Simd::<i16, N>::splat(Q_NULL_SHORT);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
let has_null = values[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.any(|c| Simd::<i16, N>::from_slice(c).simd_eq(sentinel).any())
|
||||
|| values[n_aligned..].iter().any(|&v| v == Q_NULL_SHORT);
|
||||
|
||||
if !has_null {
|
||||
return None;
|
||||
}
|
||||
Some(values.iter().map(|&v| v != Q_NULL_SHORT).collect())
|
||||
}
|
||||
|
||||
/// Returns a validity vector for a `&[i32]` slice using the supplied sentinel.
|
||||
/// Returns `None` when no nulls are present.
|
||||
#[inline]
|
||||
pub fn validity_i32(values: &[i32], sentinel: i32) -> Option<Vec<bool>> {
|
||||
const N: usize = 16;
|
||||
let sentinel_v = Simd::<i32, N>::splat(sentinel);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
let has_null = values[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.any(|c| Simd::<i32, N>::from_slice(c).simd_eq(sentinel_v).any())
|
||||
|| values[n_aligned..].iter().any(|&v| v == sentinel);
|
||||
|
||||
if !has_null {
|
||||
return None;
|
||||
}
|
||||
Some(values.iter().map(|&v| v != sentinel).collect())
|
||||
}
|
||||
|
||||
/// Returns a validity vector for a `&[i64]` slice using the supplied sentinel.
|
||||
/// Returns `None` when no nulls are present.
|
||||
#[inline]
|
||||
pub fn validity_i64(values: &[i64], sentinel: i64) -> Option<Vec<bool>> {
|
||||
const N: usize = 8;
|
||||
let sentinel_v = Simd::<i64, N>::splat(sentinel);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
let has_null = values[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.any(|c| Simd::<i64, N>::from_slice(c).simd_eq(sentinel_v).any())
|
||||
|| values[n_aligned..].iter().any(|&v| v == sentinel);
|
||||
|
||||
if !has_null {
|
||||
return None;
|
||||
}
|
||||
Some(values.iter().map(|&v| v != sentinel).collect())
|
||||
}
|
||||
|
||||
/// Returns a validity vector for a `&[f32]` slice where `NaN` encodes null.
|
||||
/// Returns `None` when no nulls are present.
|
||||
#[inline]
|
||||
pub fn validity_f32(values: &[f32]) -> Option<Vec<bool>> {
|
||||
const N: usize = 16;
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
// NaN is the only value not equal to itself.
|
||||
let has_null = values[..n_aligned].chunks_exact(N).any(|c| {
|
||||
let v = Simd::<f32, N>::from_slice(c);
|
||||
v.simd_ne(v).any()
|
||||
}) || values[n_aligned..].iter().any(|v| v.is_nan());
|
||||
|
||||
if !has_null {
|
||||
return None;
|
||||
}
|
||||
Some(values.iter().map(|v| !v.is_nan()).collect())
|
||||
}
|
||||
|
||||
/// Returns a validity vector for a `&[f64]` slice where `NaN` encodes null.
|
||||
/// Returns `None` when no nulls are present.
|
||||
#[inline]
|
||||
pub fn validity_f64(values: &[f64]) -> Option<Vec<bool>> {
|
||||
const N: usize = 8;
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
let has_null = values[..n_aligned].chunks_exact(N).any(|c| {
|
||||
let v = Simd::<f64, N>::from_slice(c);
|
||||
v.simd_ne(v).any()
|
||||
}) || values[n_aligned..].iter().any(|v| v.is_nan());
|
||||
|
||||
if !has_null {
|
||||
return None;
|
||||
}
|
||||
Some(values.iter().map(|v| !v.is_nan()).collect())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// validity_i16
|
||||
#[test]
|
||||
fn i16_no_nulls() {
|
||||
assert_eq!(validity_i16(&[1, 2, 3, 4, 5]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i16_with_null() {
|
||||
assert_eq!(
|
||||
validity_i16(&[1, Q_NULL_SHORT, 3]),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i16_all_nulls() {
|
||||
assert_eq!(validity_i16(&[Q_NULL_SHORT; 4]), Some(vec![false; 4]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i16_empty() {
|
||||
assert_eq!(validity_i16(&[]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i16_single_null() {
|
||||
assert_eq!(validity_i16(&[Q_NULL_SHORT]), Some(vec![false]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i16_null_in_remainder() {
|
||||
let mut data: Vec<i16> = (1..=9).collect();
|
||||
data[8] = Q_NULL_SHORT;
|
||||
let v = validity_i16(&data).unwrap();
|
||||
assert!(!v[8]);
|
||||
assert!(v[0]);
|
||||
}
|
||||
|
||||
// validity_i32
|
||||
#[test]
|
||||
fn i32_no_nulls() {
|
||||
assert_eq!(validity_i32(&[1, 2, 3], Q_NULL_INT), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i32_with_null() {
|
||||
assert_eq!(
|
||||
validity_i32(&[1, Q_NULL_INT, 3], Q_NULL_INT),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i32_empty() {
|
||||
assert_eq!(validity_i32(&[], Q_NULL_INT), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i32_all_nulls() {
|
||||
assert_eq!(
|
||||
validity_i32(&[Q_NULL_INT; 3], Q_NULL_INT),
|
||||
Some(vec![false; 3])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i32_null_in_remainder() {
|
||||
let mut data: Vec<i32> = (1..=10).collect();
|
||||
data[9] = Q_NULL_INT;
|
||||
assert!(!validity_i32(&data, Q_NULL_INT).unwrap()[9]);
|
||||
}
|
||||
|
||||
// validity_i64
|
||||
#[test]
|
||||
fn i64_no_nulls() {
|
||||
assert_eq!(validity_i64(&[1, 2, 3], Q_NULL_LONG), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i64_with_null() {
|
||||
assert_eq!(
|
||||
validity_i64(&[1, Q_NULL_LONG, 3], Q_NULL_LONG),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i64_empty() {
|
||||
assert_eq!(validity_i64(&[], Q_NULL_LONG), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn i64_timestamp_sentinel() {
|
||||
assert_eq!(
|
||||
validity_i64(&[100, Q_NULL_TIMESTAMP, 300], Q_NULL_TIMESTAMP),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
// validity_f32
|
||||
#[test]
|
||||
fn f32_no_nulls() {
|
||||
assert_eq!(validity_f32(&[1.0, 2.0, 3.0]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f32_with_nan() {
|
||||
assert_eq!(
|
||||
validity_f32(&[1.0, f32::NAN, 3.0]),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f32_empty() {
|
||||
assert_eq!(validity_f32(&[]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f32_infinity_is_not_null() {
|
||||
assert_eq!(validity_f32(&[f32::INFINITY, f32::NEG_INFINITY, 1.0]), None);
|
||||
}
|
||||
|
||||
// validity_f64
|
||||
#[test]
|
||||
fn f64_no_nulls() {
|
||||
assert_eq!(validity_f64(&[1.0, 2.0, 3.0]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f64_with_nan() {
|
||||
assert_eq!(
|
||||
validity_f64(&[1.0, f64::NAN, 3.0]),
|
||||
Some(vec![true, false, true])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f64_empty() {
|
||||
assert_eq!(validity_f64(&[]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f64_infinity_is_not_null() {
|
||||
assert_eq!(validity_f64(&[f64::INFINITY, f64::NEG_INFINITY, 1.0]), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn f64_large_aligned_with_null() {
|
||||
let mut data = vec![1.0; 8];
|
||||
data[7] = f64::NAN;
|
||||
let v = validity_f64(&data).unwrap();
|
||||
assert!(v[0]);
|
||||
assert!(!v[7]);
|
||||
}
|
||||
}
|
||||
317
crates/qroissant-kernels/src/temporal.rs
Normal file
317
crates/qroissant-kernels/src/temporal.rs
Normal file
|
|
@ -0,0 +1,317 @@
|
|||
//! Temporal conversion constants and SIMD transforms for q ↔ Arrow mapping.
|
||||
//!
|
||||
//! q encodes temporal values relative to the millennium epoch (2000-01-01)
|
||||
//! while Arrow uses the Unix epoch (1970-01-01). The helpers here translate
|
||||
//! between the two without touching Arrow types so that this crate stays free
|
||||
//! of Arrow dependencies.
|
||||
//!
|
||||
//! Each transform function uses `portable_simd` for the aligned middle of the
|
||||
//! slice and falls back to a scalar loop for the head and tail.
|
||||
|
||||
use std::simd::Select;
|
||||
use std::simd::prelude::*;
|
||||
|
||||
use crate::nulls::Q_NULL_DATE;
|
||||
use crate::nulls::Q_NULL_MINUTE;
|
||||
use crate::nulls::Q_NULL_TIMESTAMP;
|
||||
|
||||
/// Nanoseconds between 1970-01-01 and 2000-01-01.
|
||||
pub const TIMESTAMP_OFFSET_NS: i64 = 946_684_800_000_000_000;
|
||||
|
||||
/// Days between 1970-01-01 and 2000-01-01.
|
||||
pub const DATE_OFFSET_DAYS: i32 = 10_957;
|
||||
|
||||
/// Milliseconds in a day (used for `Datetime` float-day conversion).
|
||||
pub const MILLIS_PER_DAY: f64 = 86_400_000.0;
|
||||
|
||||
/// Translates a slice of q timestamps (nanoseconds since 2000-01-01) into
|
||||
/// Arrow `TimestampNanosecond` values (nanoseconds since 1970-01-01) in place.
|
||||
///
|
||||
/// Null elements (`i64::MIN`) are left unchanged; the Arrow null buffer
|
||||
/// produced by [`crate::nulls::validity_i64`] will mask them.
|
||||
#[inline]
|
||||
pub fn offset_timestamps(values: &mut [i64]) {
|
||||
const N: usize = 8;
|
||||
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i64, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let added = v.saturating_add(offset_v);
|
||||
let result = mask.select(added, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_TIMESTAMP {
|
||||
*v = v.saturating_add(TIMESTAMP_OFFSET_NS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a slice of q dates (days since 2000-01-01) into Arrow `Date32`
|
||||
/// values (days since 1970-01-01) in place.
|
||||
///
|
||||
/// Null elements (`i32::MIN`) are left unchanged.
|
||||
#[inline]
|
||||
pub fn offset_dates(values: &mut [i32]) {
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i32, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let added = v.saturating_add(offset_v);
|
||||
let result = mask.select(added, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_DATE {
|
||||
*v = v.saturating_add(DATE_OFFSET_DAYS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a slice of q minute values (minutes) into Arrow `Time32Second`
|
||||
/// values (seconds) in place.
|
||||
///
|
||||
/// Null elements (`i32::MIN`) are left unchanged.
|
||||
#[inline]
|
||||
pub fn minutes_to_seconds(values: &mut [i32]) {
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i32, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
// Non-null minutes multiplied by 60; null sentinels selected back in.
|
||||
// Wrapping multiply is safe here: the select restores the original
|
||||
// sentinel value for null lanes, so overflow in null lanes is harmless.
|
||||
let multiplied = v * sixty_v;
|
||||
let result = mask.select(multiplied, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_MINUTE {
|
||||
*v = v.saturating_mul(60);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q timestamps (nanoseconds since 2000-01-01) from `src` into `dst`,
|
||||
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||
///
|
||||
/// Avoids the two-pass cost of `to_vec()` + `offset_timestamps()`:
|
||||
/// one read from `src`, one write to `dst`, no intermediate allocation.
|
||||
/// Null elements (`i64::MIN`) are copied unchanged.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_offset_timestamps(src: &[i64], dst: &mut [i64]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 8;
|
||||
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i64, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let result = mask.select(v.saturating_add(offset_v), v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_TIMESTAMP {
|
||||
s.saturating_add(TIMESTAMP_OFFSET_NS)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q dates (days since 2000-01-01) from `src` into `dst`,
|
||||
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_offset_dates(src: &[i32], dst: &mut [i32]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i32, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let result = mask.select(v.saturating_add(offset_v), v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_DATE {
|
||||
s.saturating_add(DATE_OFFSET_DAYS)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q minute values from `src` into `dst`, converting minutes → seconds
|
||||
/// in a single SIMD pass.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_minutes_to_seconds(src: &[i32], dst: &mut [i32]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i32, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let multiplied = v * sixty_v;
|
||||
let result = mask.select(multiplied, v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_MINUTE {
|
||||
s.saturating_mul(60)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// offset_timestamps
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_basic() {
|
||||
// q timestamp 1 ns since 2000 -> Unix epoch ns
|
||||
let mut values = vec![1i64];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS + 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_zero() {
|
||||
let mut values = vec![0i64];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_preserves_null() {
|
||||
let mut values = vec![Q_NULL_TIMESTAMP];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_TIMESTAMP);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_mixed() {
|
||||
let mut values = vec![0, Q_NULL_TIMESTAMP, 1000, Q_NULL_TIMESTAMP, 2000];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||
assert_eq!(values[1], Q_NULL_TIMESTAMP);
|
||||
assert_eq!(values[2], TIMESTAMP_OFFSET_NS + 1000);
|
||||
assert_eq!(values[3], Q_NULL_TIMESTAMP);
|
||||
assert_eq!(values[4], TIMESTAMP_OFFSET_NS + 2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_empty() {
|
||||
let mut values: Vec<i64> = vec![];
|
||||
offset_timestamps(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// offset_dates
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn offset_dates_basic() {
|
||||
let mut values = vec![0i32]; // 2000-01-01 -> days since Unix epoch
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_preserves_null() {
|
||||
let mut values = vec![Q_NULL_DATE];
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_DATE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_mixed() {
|
||||
let mut values = vec![0, Q_NULL_DATE, 1, Q_NULL_DATE];
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||
assert_eq!(values[1], Q_NULL_DATE);
|
||||
assert_eq!(values[2], DATE_OFFSET_DAYS + 1);
|
||||
assert_eq!(values[3], Q_NULL_DATE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_empty() {
|
||||
let mut values: Vec<i32> = vec![];
|
||||
offset_dates(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// minutes_to_seconds
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_basic() {
|
||||
let mut values = vec![10i32]; // 10 minutes -> 600 seconds
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], 600);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_preserves_null() {
|
||||
let mut values = vec![Q_NULL_MINUTE];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_MINUTE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_mixed() {
|
||||
let mut values = vec![1, Q_NULL_MINUTE, 60];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], 60);
|
||||
assert_eq!(values[1], Q_NULL_MINUTE);
|
||||
assert_eq!(values[2], 3600);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_empty() {
|
||||
let mut values: Vec<i32> = vec![];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue