`std::simd::Select` no longer exists in current nightly's portable_simd API; methods now hang off `Mask` inherently. The unused import was preventing the crate (and everything depending on it -- qroissant-arrow, qroissant-python) from building on a current nightly toolchain.
316 lines
10 KiB
Rust
316 lines
10 KiB
Rust
//! Temporal conversion constants and SIMD transforms for q ↔ Arrow mapping.
|
|
//!
|
|
//! q encodes temporal values relative to the millennium epoch (2000-01-01)
|
|
//! while Arrow uses the Unix epoch (1970-01-01). The helpers here translate
|
|
//! between the two without touching Arrow types so that this crate stays free
|
|
//! of Arrow dependencies.
|
|
//!
|
|
//! Each transform function uses `portable_simd` for the aligned middle of the
|
|
//! slice and falls back to a scalar loop for the head and tail.
|
|
|
|
use std::simd::prelude::*;
|
|
|
|
use crate::nulls::Q_NULL_DATE;
|
|
use crate::nulls::Q_NULL_MINUTE;
|
|
use crate::nulls::Q_NULL_TIMESTAMP;
|
|
|
|
/// Nanoseconds between 1970-01-01 and 2000-01-01.
|
|
pub const TIMESTAMP_OFFSET_NS: i64 = 946_684_800_000_000_000;
|
|
|
|
/// Days between 1970-01-01 and 2000-01-01.
|
|
pub const DATE_OFFSET_DAYS: i32 = 10_957;
|
|
|
|
/// Milliseconds in a day (used for `Datetime` float-day conversion).
|
|
pub const MILLIS_PER_DAY: f64 = 86_400_000.0;
|
|
|
|
/// Translates a slice of q timestamps (nanoseconds since 2000-01-01) into
|
|
/// Arrow `TimestampNanosecond` values (nanoseconds since 1970-01-01) in place.
|
|
///
|
|
/// Null elements (`i64::MIN`) are left unchanged; the Arrow null buffer
|
|
/// produced by [`crate::nulls::validity_i64`] will mask them.
|
|
#[inline]
|
|
pub fn offset_timestamps(values: &mut [i64]) {
|
|
const N: usize = 8;
|
|
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
|
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
|
let n_aligned = (values.len() / N) * N;
|
|
|
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
|
let v = Simd::<i64, N>::from_slice(chunk);
|
|
let mask = v.simd_ne(null_v);
|
|
let added = v.saturating_add(offset_v);
|
|
let result = mask.select(added, v);
|
|
chunk.copy_from_slice(&result.to_array());
|
|
}
|
|
for v in &mut values[n_aligned..] {
|
|
if *v != Q_NULL_TIMESTAMP {
|
|
*v = v.saturating_add(TIMESTAMP_OFFSET_NS);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Translates a slice of q dates (days since 2000-01-01) into Arrow `Date32`
|
|
/// values (days since 1970-01-01) in place.
|
|
///
|
|
/// Null elements (`i32::MIN`) are left unchanged.
|
|
#[inline]
|
|
pub fn offset_dates(values: &mut [i32]) {
|
|
const N: usize = 16;
|
|
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
|
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
|
let n_aligned = (values.len() / N) * N;
|
|
|
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
|
let v = Simd::<i32, N>::from_slice(chunk);
|
|
let mask = v.simd_ne(null_v);
|
|
let added = v.saturating_add(offset_v);
|
|
let result = mask.select(added, v);
|
|
chunk.copy_from_slice(&result.to_array());
|
|
}
|
|
for v in &mut values[n_aligned..] {
|
|
if *v != Q_NULL_DATE {
|
|
*v = v.saturating_add(DATE_OFFSET_DAYS);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Translates a slice of q minute values (minutes) into Arrow `Time32Second`
|
|
/// values (seconds) in place.
|
|
///
|
|
/// Null elements (`i32::MIN`) are left unchanged.
|
|
#[inline]
|
|
pub fn minutes_to_seconds(values: &mut [i32]) {
|
|
const N: usize = 16;
|
|
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
|
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
|
let n_aligned = (values.len() / N) * N;
|
|
|
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
|
let v = Simd::<i32, N>::from_slice(chunk);
|
|
let mask = v.simd_ne(null_v);
|
|
// Non-null minutes multiplied by 60; null sentinels selected back in.
|
|
// Wrapping multiply is safe here: the select restores the original
|
|
// sentinel value for null lanes, so overflow in null lanes is harmless.
|
|
let multiplied = v * sixty_v;
|
|
let result = mask.select(multiplied, v);
|
|
chunk.copy_from_slice(&result.to_array());
|
|
}
|
|
for v in &mut values[n_aligned..] {
|
|
if *v != Q_NULL_MINUTE {
|
|
*v = v.saturating_mul(60);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Copies q timestamps (nanoseconds since 2000-01-01) from `src` into `dst`,
|
|
/// applying the Unix-epoch offset in a single SIMD pass.
|
|
///
|
|
/// Avoids the two-pass cost of `to_vec()` + `offset_timestamps()`:
|
|
/// one read from `src`, one write to `dst`, no intermediate allocation.
|
|
/// Null elements (`i64::MIN`) are copied unchanged.
|
|
///
|
|
/// `src` and `dst` must have the same length.
|
|
#[inline]
|
|
pub fn copy_and_offset_timestamps(src: &[i64], dst: &mut [i64]) {
|
|
debug_assert_eq!(src.len(), dst.len());
|
|
const N: usize = 8;
|
|
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
|
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
|
let n_aligned = (src.len() / N) * N;
|
|
|
|
for (s, d) in src[..n_aligned]
|
|
.chunks_exact(N)
|
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
|
{
|
|
let v = Simd::<i64, N>::from_slice(s);
|
|
let mask = v.simd_ne(null_v);
|
|
let result = mask.select(v.saturating_add(offset_v), v);
|
|
d.copy_from_slice(&result.to_array());
|
|
}
|
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
|
*d = if *s != Q_NULL_TIMESTAMP {
|
|
s.saturating_add(TIMESTAMP_OFFSET_NS)
|
|
} else {
|
|
*s
|
|
};
|
|
}
|
|
}
|
|
|
|
/// Copies q dates (days since 2000-01-01) from `src` into `dst`,
|
|
/// applying the Unix-epoch offset in a single SIMD pass.
|
|
///
|
|
/// `src` and `dst` must have the same length.
|
|
#[inline]
|
|
pub fn copy_and_offset_dates(src: &[i32], dst: &mut [i32]) {
|
|
debug_assert_eq!(src.len(), dst.len());
|
|
const N: usize = 16;
|
|
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
|
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
|
let n_aligned = (src.len() / N) * N;
|
|
|
|
for (s, d) in src[..n_aligned]
|
|
.chunks_exact(N)
|
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
|
{
|
|
let v = Simd::<i32, N>::from_slice(s);
|
|
let mask = v.simd_ne(null_v);
|
|
let result = mask.select(v.saturating_add(offset_v), v);
|
|
d.copy_from_slice(&result.to_array());
|
|
}
|
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
|
*d = if *s != Q_NULL_DATE {
|
|
s.saturating_add(DATE_OFFSET_DAYS)
|
|
} else {
|
|
*s
|
|
};
|
|
}
|
|
}
|
|
|
|
/// Copies q minute values from `src` into `dst`, converting minutes → seconds
|
|
/// in a single SIMD pass.
|
|
///
|
|
/// `src` and `dst` must have the same length.
|
|
#[inline]
|
|
pub fn copy_and_minutes_to_seconds(src: &[i32], dst: &mut [i32]) {
|
|
debug_assert_eq!(src.len(), dst.len());
|
|
const N: usize = 16;
|
|
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
|
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
|
let n_aligned = (src.len() / N) * N;
|
|
|
|
for (s, d) in src[..n_aligned]
|
|
.chunks_exact(N)
|
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
|
{
|
|
let v = Simd::<i32, N>::from_slice(s);
|
|
let mask = v.simd_ne(null_v);
|
|
let multiplied = v * sixty_v;
|
|
let result = mask.select(multiplied, v);
|
|
d.copy_from_slice(&result.to_array());
|
|
}
|
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
|
*d = if *s != Q_NULL_MINUTE {
|
|
s.saturating_mul(60)
|
|
} else {
|
|
*s
|
|
};
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
// -----------------------------------------------------------------------
|
|
// offset_timestamps
|
|
// -----------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn offset_timestamps_basic() {
|
|
// q timestamp 1 ns since 2000 -> Unix epoch ns
|
|
let mut values = vec![1i64];
|
|
offset_timestamps(&mut values);
|
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS + 1);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_timestamps_zero() {
|
|
let mut values = vec![0i64];
|
|
offset_timestamps(&mut values);
|
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_timestamps_preserves_null() {
|
|
let mut values = vec![Q_NULL_TIMESTAMP];
|
|
offset_timestamps(&mut values);
|
|
assert_eq!(values[0], Q_NULL_TIMESTAMP);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_timestamps_mixed() {
|
|
let mut values = vec![0, Q_NULL_TIMESTAMP, 1000, Q_NULL_TIMESTAMP, 2000];
|
|
offset_timestamps(&mut values);
|
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
|
assert_eq!(values[1], Q_NULL_TIMESTAMP);
|
|
assert_eq!(values[2], TIMESTAMP_OFFSET_NS + 1000);
|
|
assert_eq!(values[3], Q_NULL_TIMESTAMP);
|
|
assert_eq!(values[4], TIMESTAMP_OFFSET_NS + 2000);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_timestamps_empty() {
|
|
let mut values: Vec<i64> = vec![];
|
|
offset_timestamps(&mut values);
|
|
assert!(values.is_empty());
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// offset_dates
|
|
// -----------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn offset_dates_basic() {
|
|
let mut values = vec![0i32]; // 2000-01-01 -> days since Unix epoch
|
|
offset_dates(&mut values);
|
|
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_dates_preserves_null() {
|
|
let mut values = vec![Q_NULL_DATE];
|
|
offset_dates(&mut values);
|
|
assert_eq!(values[0], Q_NULL_DATE);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_dates_mixed() {
|
|
let mut values = vec![0, Q_NULL_DATE, 1, Q_NULL_DATE];
|
|
offset_dates(&mut values);
|
|
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
|
assert_eq!(values[1], Q_NULL_DATE);
|
|
assert_eq!(values[2], DATE_OFFSET_DAYS + 1);
|
|
assert_eq!(values[3], Q_NULL_DATE);
|
|
}
|
|
|
|
#[test]
|
|
fn offset_dates_empty() {
|
|
let mut values: Vec<i32> = vec![];
|
|
offset_dates(&mut values);
|
|
assert!(values.is_empty());
|
|
}
|
|
|
|
// -----------------------------------------------------------------------
|
|
// minutes_to_seconds
|
|
// -----------------------------------------------------------------------
|
|
|
|
#[test]
|
|
fn minutes_to_seconds_basic() {
|
|
let mut values = vec![10i32]; // 10 minutes -> 600 seconds
|
|
minutes_to_seconds(&mut values);
|
|
assert_eq!(values[0], 600);
|
|
}
|
|
|
|
#[test]
|
|
fn minutes_to_seconds_preserves_null() {
|
|
let mut values = vec![Q_NULL_MINUTE];
|
|
minutes_to_seconds(&mut values);
|
|
assert_eq!(values[0], Q_NULL_MINUTE);
|
|
}
|
|
|
|
#[test]
|
|
fn minutes_to_seconds_mixed() {
|
|
let mut values = vec![1, Q_NULL_MINUTE, 60];
|
|
minutes_to_seconds(&mut values);
|
|
assert_eq!(values[0], 60);
|
|
assert_eq!(values[1], Q_NULL_MINUTE);
|
|
assert_eq!(values[2], 3600);
|
|
}
|
|
|
|
#[test]
|
|
fn minutes_to_seconds_empty() {
|
|
let mut values: Vec<i32> = vec![];
|
|
minutes_to_seconds(&mut values);
|
|
assert!(values.is_empty());
|
|
}
|
|
}
|