qroissant/crates/qroissant-kernels/src/temporal.rs
CamZalewski aa2c0a2ec7 fix(kernels): drop removed std::simd::Select import
`std::simd::Select` no longer exists in current nightly's portable_simd
API; methods now hang off `Mask` inherently. The unused import was
preventing the crate (and everything depending on it -- qroissant-arrow,
qroissant-python) from building on a current nightly toolchain.
2026-05-20 14:41:37 +01:00

316 lines
10 KiB
Rust

//! Temporal conversion constants and SIMD transforms for q ↔ Arrow mapping.
//!
//! q encodes temporal values relative to the millennium epoch (2000-01-01)
//! while Arrow uses the Unix epoch (1970-01-01). The helpers here translate
//! between the two without touching Arrow types so that this crate stays free
//! of Arrow dependencies.
//!
//! Each transform function uses `portable_simd` for the aligned middle of the
//! slice and falls back to a scalar loop for the head and tail.
use std::simd::prelude::*;
use crate::nulls::Q_NULL_DATE;
use crate::nulls::Q_NULL_MINUTE;
use crate::nulls::Q_NULL_TIMESTAMP;
/// Nanoseconds between 1970-01-01 and 2000-01-01.
pub const TIMESTAMP_OFFSET_NS: i64 = 946_684_800_000_000_000;
/// Days between 1970-01-01 and 2000-01-01.
pub const DATE_OFFSET_DAYS: i32 = 10_957;
/// Milliseconds in a day (used for `Datetime` float-day conversion).
pub const MILLIS_PER_DAY: f64 = 86_400_000.0;
/// Translates a slice of q timestamps (nanoseconds since 2000-01-01) into
/// Arrow `TimestampNanosecond` values (nanoseconds since 1970-01-01) in place.
///
/// Null elements (`i64::MIN`) are left unchanged; the Arrow null buffer
/// produced by [`crate::nulls::validity_i64`] will mask them.
#[inline]
pub fn offset_timestamps(values: &mut [i64]) {
const N: usize = 8;
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
let n_aligned = (values.len() / N) * N;
for chunk in values[..n_aligned].chunks_exact_mut(N) {
let v = Simd::<i64, N>::from_slice(chunk);
let mask = v.simd_ne(null_v);
let added = v.saturating_add(offset_v);
let result = mask.select(added, v);
chunk.copy_from_slice(&result.to_array());
}
for v in &mut values[n_aligned..] {
if *v != Q_NULL_TIMESTAMP {
*v = v.saturating_add(TIMESTAMP_OFFSET_NS);
}
}
}
/// Translates a slice of q dates (days since 2000-01-01) into Arrow `Date32`
/// values (days since 1970-01-01) in place.
///
/// Null elements (`i32::MIN`) are left unchanged.
#[inline]
pub fn offset_dates(values: &mut [i32]) {
const N: usize = 16;
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
let n_aligned = (values.len() / N) * N;
for chunk in values[..n_aligned].chunks_exact_mut(N) {
let v = Simd::<i32, N>::from_slice(chunk);
let mask = v.simd_ne(null_v);
let added = v.saturating_add(offset_v);
let result = mask.select(added, v);
chunk.copy_from_slice(&result.to_array());
}
for v in &mut values[n_aligned..] {
if *v != Q_NULL_DATE {
*v = v.saturating_add(DATE_OFFSET_DAYS);
}
}
}
/// Translates a slice of q minute values (minutes) into Arrow `Time32Second`
/// values (seconds) in place.
///
/// Null elements (`i32::MIN`) are left unchanged.
#[inline]
pub fn minutes_to_seconds(values: &mut [i32]) {
const N: usize = 16;
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
let sixty_v = Simd::<i32, N>::splat(60_i32);
let n_aligned = (values.len() / N) * N;
for chunk in values[..n_aligned].chunks_exact_mut(N) {
let v = Simd::<i32, N>::from_slice(chunk);
let mask = v.simd_ne(null_v);
// Non-null minutes multiplied by 60; null sentinels selected back in.
// Wrapping multiply is safe here: the select restores the original
// sentinel value for null lanes, so overflow in null lanes is harmless.
let multiplied = v * sixty_v;
let result = mask.select(multiplied, v);
chunk.copy_from_slice(&result.to_array());
}
for v in &mut values[n_aligned..] {
if *v != Q_NULL_MINUTE {
*v = v.saturating_mul(60);
}
}
}
/// Copies q timestamps (nanoseconds since 2000-01-01) from `src` into `dst`,
/// applying the Unix-epoch offset in a single SIMD pass.
///
/// Avoids the two-pass cost of `to_vec()` + `offset_timestamps()`:
/// one read from `src`, one write to `dst`, no intermediate allocation.
/// Null elements (`i64::MIN`) are copied unchanged.
///
/// `src` and `dst` must have the same length.
#[inline]
pub fn copy_and_offset_timestamps(src: &[i64], dst: &mut [i64]) {
debug_assert_eq!(src.len(), dst.len());
const N: usize = 8;
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
let n_aligned = (src.len() / N) * N;
for (s, d) in src[..n_aligned]
.chunks_exact(N)
.zip(dst[..n_aligned].chunks_exact_mut(N))
{
let v = Simd::<i64, N>::from_slice(s);
let mask = v.simd_ne(null_v);
let result = mask.select(v.saturating_add(offset_v), v);
d.copy_from_slice(&result.to_array());
}
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
*d = if *s != Q_NULL_TIMESTAMP {
s.saturating_add(TIMESTAMP_OFFSET_NS)
} else {
*s
};
}
}
/// Copies q dates (days since 2000-01-01) from `src` into `dst`,
/// applying the Unix-epoch offset in a single SIMD pass.
///
/// `src` and `dst` must have the same length.
#[inline]
pub fn copy_and_offset_dates(src: &[i32], dst: &mut [i32]) {
debug_assert_eq!(src.len(), dst.len());
const N: usize = 16;
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
let n_aligned = (src.len() / N) * N;
for (s, d) in src[..n_aligned]
.chunks_exact(N)
.zip(dst[..n_aligned].chunks_exact_mut(N))
{
let v = Simd::<i32, N>::from_slice(s);
let mask = v.simd_ne(null_v);
let result = mask.select(v.saturating_add(offset_v), v);
d.copy_from_slice(&result.to_array());
}
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
*d = if *s != Q_NULL_DATE {
s.saturating_add(DATE_OFFSET_DAYS)
} else {
*s
};
}
}
/// Copies q minute values from `src` into `dst`, converting minutes → seconds
/// in a single SIMD pass.
///
/// `src` and `dst` must have the same length.
#[inline]
pub fn copy_and_minutes_to_seconds(src: &[i32], dst: &mut [i32]) {
debug_assert_eq!(src.len(), dst.len());
const N: usize = 16;
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
let sixty_v = Simd::<i32, N>::splat(60_i32);
let n_aligned = (src.len() / N) * N;
for (s, d) in src[..n_aligned]
.chunks_exact(N)
.zip(dst[..n_aligned].chunks_exact_mut(N))
{
let v = Simd::<i32, N>::from_slice(s);
let mask = v.simd_ne(null_v);
let multiplied = v * sixty_v;
let result = mask.select(multiplied, v);
d.copy_from_slice(&result.to_array());
}
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
*d = if *s != Q_NULL_MINUTE {
s.saturating_mul(60)
} else {
*s
};
}
}
#[cfg(test)]
mod tests {
use super::*;
// -----------------------------------------------------------------------
// offset_timestamps
// -----------------------------------------------------------------------
#[test]
fn offset_timestamps_basic() {
// q timestamp 1 ns since 2000 -> Unix epoch ns
let mut values = vec![1i64];
offset_timestamps(&mut values);
assert_eq!(values[0], TIMESTAMP_OFFSET_NS + 1);
}
#[test]
fn offset_timestamps_zero() {
let mut values = vec![0i64];
offset_timestamps(&mut values);
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
}
#[test]
fn offset_timestamps_preserves_null() {
let mut values = vec![Q_NULL_TIMESTAMP];
offset_timestamps(&mut values);
assert_eq!(values[0], Q_NULL_TIMESTAMP);
}
#[test]
fn offset_timestamps_mixed() {
let mut values = vec![0, Q_NULL_TIMESTAMP, 1000, Q_NULL_TIMESTAMP, 2000];
offset_timestamps(&mut values);
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
assert_eq!(values[1], Q_NULL_TIMESTAMP);
assert_eq!(values[2], TIMESTAMP_OFFSET_NS + 1000);
assert_eq!(values[3], Q_NULL_TIMESTAMP);
assert_eq!(values[4], TIMESTAMP_OFFSET_NS + 2000);
}
#[test]
fn offset_timestamps_empty() {
let mut values: Vec<i64> = vec![];
offset_timestamps(&mut values);
assert!(values.is_empty());
}
// -----------------------------------------------------------------------
// offset_dates
// -----------------------------------------------------------------------
#[test]
fn offset_dates_basic() {
let mut values = vec![0i32]; // 2000-01-01 -> days since Unix epoch
offset_dates(&mut values);
assert_eq!(values[0], DATE_OFFSET_DAYS);
}
#[test]
fn offset_dates_preserves_null() {
let mut values = vec![Q_NULL_DATE];
offset_dates(&mut values);
assert_eq!(values[0], Q_NULL_DATE);
}
#[test]
fn offset_dates_mixed() {
let mut values = vec![0, Q_NULL_DATE, 1, Q_NULL_DATE];
offset_dates(&mut values);
assert_eq!(values[0], DATE_OFFSET_DAYS);
assert_eq!(values[1], Q_NULL_DATE);
assert_eq!(values[2], DATE_OFFSET_DAYS + 1);
assert_eq!(values[3], Q_NULL_DATE);
}
#[test]
fn offset_dates_empty() {
let mut values: Vec<i32> = vec![];
offset_dates(&mut values);
assert!(values.is_empty());
}
// -----------------------------------------------------------------------
// minutes_to_seconds
// -----------------------------------------------------------------------
#[test]
fn minutes_to_seconds_basic() {
let mut values = vec![10i32]; // 10 minutes -> 600 seconds
minutes_to_seconds(&mut values);
assert_eq!(values[0], 600);
}
#[test]
fn minutes_to_seconds_preserves_null() {
let mut values = vec![Q_NULL_MINUTE];
minutes_to_seconds(&mut values);
assert_eq!(values[0], Q_NULL_MINUTE);
}
#[test]
fn minutes_to_seconds_mixed() {
let mut values = vec![1, Q_NULL_MINUTE, 60];
minutes_to_seconds(&mut values);
assert_eq!(values[0], 60);
assert_eq!(values[1], Q_NULL_MINUTE);
assert_eq!(values[2], 3600);
}
#[test]
fn minutes_to_seconds_empty() {
let mut values: Vec<i32> = vec![];
minutes_to_seconds(&mut values);
assert!(values.is_empty());
}
}