Vendor qroissant 0.3.0 baseline
This commit is contained in:
commit
53ac90fe84
56 changed files with 18309 additions and 0 deletions
317
crates/qroissant-kernels/src/temporal.rs
Normal file
317
crates/qroissant-kernels/src/temporal.rs
Normal file
|
|
@ -0,0 +1,317 @@
|
|||
//! Temporal conversion constants and SIMD transforms for q ↔ Arrow mapping.
|
||||
//!
|
||||
//! q encodes temporal values relative to the millennium epoch (2000-01-01)
|
||||
//! while Arrow uses the Unix epoch (1970-01-01). The helpers here translate
|
||||
//! between the two without touching Arrow types so that this crate stays free
|
||||
//! of Arrow dependencies.
|
||||
//!
|
||||
//! Each transform function uses `portable_simd` for the aligned middle of the
|
||||
//! slice and falls back to a scalar loop for the head and tail.
|
||||
|
||||
use std::simd::Select;
|
||||
use std::simd::prelude::*;
|
||||
|
||||
use crate::nulls::Q_NULL_DATE;
|
||||
use crate::nulls::Q_NULL_MINUTE;
|
||||
use crate::nulls::Q_NULL_TIMESTAMP;
|
||||
|
||||
/// Nanoseconds between 1970-01-01 and 2000-01-01.
|
||||
pub const TIMESTAMP_OFFSET_NS: i64 = 946_684_800_000_000_000;
|
||||
|
||||
/// Days between 1970-01-01 and 2000-01-01.
|
||||
pub const DATE_OFFSET_DAYS: i32 = 10_957;
|
||||
|
||||
/// Milliseconds in a day (used for `Datetime` float-day conversion).
|
||||
pub const MILLIS_PER_DAY: f64 = 86_400_000.0;
|
||||
|
||||
/// Translates a slice of q timestamps (nanoseconds since 2000-01-01) into
|
||||
/// Arrow `TimestampNanosecond` values (nanoseconds since 1970-01-01) in place.
|
||||
///
|
||||
/// Null elements (`i64::MIN`) are left unchanged; the Arrow null buffer
|
||||
/// produced by [`crate::nulls::validity_i64`] will mask them.
|
||||
#[inline]
|
||||
pub fn offset_timestamps(values: &mut [i64]) {
|
||||
const N: usize = 8;
|
||||
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i64, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let added = v.saturating_add(offset_v);
|
||||
let result = mask.select(added, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_TIMESTAMP {
|
||||
*v = v.saturating_add(TIMESTAMP_OFFSET_NS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a slice of q dates (days since 2000-01-01) into Arrow `Date32`
|
||||
/// values (days since 1970-01-01) in place.
|
||||
///
|
||||
/// Null elements (`i32::MIN`) are left unchanged.
|
||||
#[inline]
|
||||
pub fn offset_dates(values: &mut [i32]) {
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i32, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let added = v.saturating_add(offset_v);
|
||||
let result = mask.select(added, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_DATE {
|
||||
*v = v.saturating_add(DATE_OFFSET_DAYS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translates a slice of q minute values (minutes) into Arrow `Time32Second`
|
||||
/// values (seconds) in place.
|
||||
///
|
||||
/// Null elements (`i32::MIN`) are left unchanged.
|
||||
#[inline]
|
||||
pub fn minutes_to_seconds(values: &mut [i32]) {
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||
let n_aligned = (values.len() / N) * N;
|
||||
|
||||
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||
let v = Simd::<i32, N>::from_slice(chunk);
|
||||
let mask = v.simd_ne(null_v);
|
||||
// Non-null minutes multiplied by 60; null sentinels selected back in.
|
||||
// Wrapping multiply is safe here: the select restores the original
|
||||
// sentinel value for null lanes, so overflow in null lanes is harmless.
|
||||
let multiplied = v * sixty_v;
|
||||
let result = mask.select(multiplied, v);
|
||||
chunk.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for v in &mut values[n_aligned..] {
|
||||
if *v != Q_NULL_MINUTE {
|
||||
*v = v.saturating_mul(60);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q timestamps (nanoseconds since 2000-01-01) from `src` into `dst`,
|
||||
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||
///
|
||||
/// Avoids the two-pass cost of `to_vec()` + `offset_timestamps()`:
|
||||
/// one read from `src`, one write to `dst`, no intermediate allocation.
|
||||
/// Null elements (`i64::MIN`) are copied unchanged.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_offset_timestamps(src: &[i64], dst: &mut [i64]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 8;
|
||||
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i64, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let result = mask.select(v.saturating_add(offset_v), v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_TIMESTAMP {
|
||||
s.saturating_add(TIMESTAMP_OFFSET_NS)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q dates (days since 2000-01-01) from `src` into `dst`,
|
||||
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_offset_dates(src: &[i32], dst: &mut [i32]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i32, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let result = mask.select(v.saturating_add(offset_v), v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_DATE {
|
||||
s.saturating_add(DATE_OFFSET_DAYS)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Copies q minute values from `src` into `dst`, converting minutes → seconds
|
||||
/// in a single SIMD pass.
|
||||
///
|
||||
/// `src` and `dst` must have the same length.
|
||||
#[inline]
|
||||
pub fn copy_and_minutes_to_seconds(src: &[i32], dst: &mut [i32]) {
|
||||
debug_assert_eq!(src.len(), dst.len());
|
||||
const N: usize = 16;
|
||||
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||
let n_aligned = (src.len() / N) * N;
|
||||
|
||||
for (s, d) in src[..n_aligned]
|
||||
.chunks_exact(N)
|
||||
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||
{
|
||||
let v = Simd::<i32, N>::from_slice(s);
|
||||
let mask = v.simd_ne(null_v);
|
||||
let multiplied = v * sixty_v;
|
||||
let result = mask.select(multiplied, v);
|
||||
d.copy_from_slice(&result.to_array());
|
||||
}
|
||||
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||
*d = if *s != Q_NULL_MINUTE {
|
||||
s.saturating_mul(60)
|
||||
} else {
|
||||
*s
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// offset_timestamps
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_basic() {
|
||||
// q timestamp 1 ns since 2000 -> Unix epoch ns
|
||||
let mut values = vec![1i64];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS + 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_zero() {
|
||||
let mut values = vec![0i64];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_preserves_null() {
|
||||
let mut values = vec![Q_NULL_TIMESTAMP];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_TIMESTAMP);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_mixed() {
|
||||
let mut values = vec![0, Q_NULL_TIMESTAMP, 1000, Q_NULL_TIMESTAMP, 2000];
|
||||
offset_timestamps(&mut values);
|
||||
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||
assert_eq!(values[1], Q_NULL_TIMESTAMP);
|
||||
assert_eq!(values[2], TIMESTAMP_OFFSET_NS + 1000);
|
||||
assert_eq!(values[3], Q_NULL_TIMESTAMP);
|
||||
assert_eq!(values[4], TIMESTAMP_OFFSET_NS + 2000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_timestamps_empty() {
|
||||
let mut values: Vec<i64> = vec![];
|
||||
offset_timestamps(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// offset_dates
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn offset_dates_basic() {
|
||||
let mut values = vec![0i32]; // 2000-01-01 -> days since Unix epoch
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_preserves_null() {
|
||||
let mut values = vec![Q_NULL_DATE];
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_DATE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_mixed() {
|
||||
let mut values = vec![0, Q_NULL_DATE, 1, Q_NULL_DATE];
|
||||
offset_dates(&mut values);
|
||||
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||
assert_eq!(values[1], Q_NULL_DATE);
|
||||
assert_eq!(values[2], DATE_OFFSET_DAYS + 1);
|
||||
assert_eq!(values[3], Q_NULL_DATE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn offset_dates_empty() {
|
||||
let mut values: Vec<i32> = vec![];
|
||||
offset_dates(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// minutes_to_seconds
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_basic() {
|
||||
let mut values = vec![10i32]; // 10 minutes -> 600 seconds
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], 600);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_preserves_null() {
|
||||
let mut values = vec![Q_NULL_MINUTE];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], Q_NULL_MINUTE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_mixed() {
|
||||
let mut values = vec![1, Q_NULL_MINUTE, 60];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert_eq!(values[0], 60);
|
||||
assert_eq!(values[1], Q_NULL_MINUTE);
|
||||
assert_eq!(values[2], 3600);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minutes_to_seconds_empty() {
|
||||
let mut values: Vec<i32> = vec![];
|
||||
minutes_to_seconds(&mut values);
|
||||
assert!(values.is_empty());
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue