feat(arrow): opt-in coalesce of heterogeneous temporal lists to Timestamp(ns)
KDB returns mixed-typed columns (most commonly: temporal nulls of varying precision -- 0Np, 0Nz, 0Nd -- interleaved) as q general lists. The faithful Arrow projection emits a `Union` of the per-element DataTypes. Polars and most DataFrame consumers reject `Union` outright, making such columns unusable downstream without a re-roundtrip dance. Add `HeterogeneousListMode::CoalesceTemporals` (off by default) on `ProjectionOptions`. When set, `project_heterogeneous_list` checks whether every arm is temporal (Timestamp, Date32, Date64, Time32, Time64, Duration) and, if so, casts each child to `Timestamp(Nanosecond, None)` via `arrow_cast::cast`, concatenates, and emits a flat array. Any non-temporal arm or cast error falls back to the existing Union path, so the flag is safe to enable globally. Plumbed through the Python `DecodeOptions` API as `with_coalesce_temporals(bool)` with matching getter and pyi stub. The default stays `False`; users opt in when they know the consumer (Polars) can't handle Union and accept the lossy precision promotion. Tests cover (a) default-Union, (b) all-temporal coalesce, and (c) non-temporal fallback to Union.
This commit is contained in:
parent
aa2c0a2ec7
commit
a1a621ddfd
8 changed files with 186 additions and 0 deletions
|
|
@ -3,6 +3,7 @@ use std::sync::Arc;
|
|||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyAny;
|
||||
use pyo3::types::PyBytes;
|
||||
use qroissant_arrow::HeterogeneousListMode;
|
||||
use qroissant_arrow::ListProjection;
|
||||
use qroissant_arrow::ProjectionOptions;
|
||||
use qroissant_arrow::StringProjection;
|
||||
|
|
@ -56,6 +57,11 @@ pub fn decode_options_to_proj_opts(opts: Option<&DecodeOptions>) -> Arc<Projecti
|
|||
crate::types::UnionMode::Dense => qroissant_arrow::UnionMode::Dense,
|
||||
crate::types::UnionMode::Sparse => qroissant_arrow::UnionMode::Sparse,
|
||||
},
|
||||
heterogeneous_list_mode: if opts.coalesce_temporals_value() {
|
||||
HeterogeneousListMode::CoalesceTemporals
|
||||
} else {
|
||||
HeterogeneousListMode::Union
|
||||
},
|
||||
treat_infinity_as_null: opts.treat_infinity_as_null(),
|
||||
parallel: opts.parallel_value(),
|
||||
assume_symbol_utf8: opts.assume_symbol_utf8_value(),
|
||||
|
|
|
|||
|
|
@ -817,6 +817,7 @@ pub struct DecodeOptions {
|
|||
validate_compressed_trailing_bytes: bool,
|
||||
temporal_nulls: bool,
|
||||
treat_infinity_as_null: bool,
|
||||
coalesce_temporals: bool,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
|
|
@ -875,6 +876,11 @@ impl DecodeOptions {
|
|||
fn get_treat_infinity_as_null(&self) -> bool {
|
||||
self.treat_infinity_as_null
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn coalesce_temporals(&self) -> bool {
|
||||
self.coalesce_temporals
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DecodeOptions {
|
||||
|
|
@ -890,6 +896,7 @@ impl Default for DecodeOptions {
|
|||
validate_compressed_trailing_bytes: true,
|
||||
temporal_nulls: true,
|
||||
treat_infinity_as_null: false,
|
||||
coalesce_temporals: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -926,6 +933,10 @@ impl DecodeOptions {
|
|||
pub(crate) fn assume_symbol_utf8_value(&self) -> bool {
|
||||
self.assume_symbol_utf8
|
||||
}
|
||||
|
||||
pub(crate) fn coalesce_temporals_value(&self) -> bool {
|
||||
self.coalesce_temporals
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(module = "qroissant", frozen, eq)]
|
||||
|
|
@ -996,6 +1007,12 @@ impl DecodeOptionsBuilder {
|
|||
next
|
||||
}
|
||||
|
||||
fn with_coalesce_temporals(&self, value: bool) -> Self {
|
||||
let mut next = self.clone();
|
||||
next.options.coalesce_temporals = value;
|
||||
next
|
||||
}
|
||||
|
||||
fn build(&self) -> DecodeOptions {
|
||||
self.options.clone()
|
||||
}
|
||||
|
|
@ -1049,6 +1066,11 @@ impl DecodeOptionsBuilder {
|
|||
fn treat_infinity_as_null(&self) -> bool {
|
||||
self.options.treat_infinity_as_null
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn coalesce_temporals(&self) -> bool {
|
||||
self.options.coalesce_temporals
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(module = "qroissant", frozen, eq)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue