diff --git a/Cargo.lock b/Cargo.lock index 047ae48..fbaedd8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "qroissant-arrow" -version = "0.3.0" +version = "0.3.1" dependencies = [ "arrow-array", "arrow-buffer", @@ -926,7 +926,7 @@ dependencies = [ [[package]] name = "qroissant-core" -version = "0.3.0" +version = "0.3.1" dependencies = [ "bytemuck", "bytes", @@ -938,11 +938,11 @@ dependencies = [ [[package]] name = "qroissant-kernels" -version = "0.3.0" +version = "0.3.1" [[package]] name = "qroissant-python" -version = "0.3.0" +version = "0.3.1" dependencies = [ "bb8", "bytes", @@ -962,7 +962,7 @@ dependencies = [ [[package]] name = "qroissant-transport" -version = "0.3.0" +version = "0.3.1" dependencies = [ "bytes", "futures", diff --git a/Cargo.toml b/Cargo.toml index cf28ecc..17b57fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/qroissant-core", "crates/qroissant-kernels", "crates/qroissan resolver = "3" [workspace.package] -version = "0.3.0" +version = "0.3.1" edition = "2024" license = "Apache-2.0" repository = "https://github.com/qroissant/qroissant" diff --git a/crates/qroissant-arrow/src/projection.rs b/crates/qroissant-arrow/src/projection.rs index 0f97f10..4965937 100644 --- a/crates/qroissant-arrow/src/projection.rs +++ b/crates/qroissant-arrow/src/projection.rs @@ -1,6 +1,5 @@ //! Arrow projection: converts decoded q `Value` trees into Arrow arrays. -use std::mem::size_of; use std::ptr::NonNull; use std::sync::Arc; @@ -601,9 +600,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult validity = merge_infinity_i64(src, Q_INF_TIMESTAMP, Q_NINF_TIMESTAMP, validity); } let nulls = to_null_buffer(validity); - let mut bytes_buf = vec![0u8; src.len() * size_of::()]; - copy_and_offset_timestamps(src, bytemuck::cast_slice_mut(&mut bytes_buf)); - let buf = Buffer::from_vec(bytes_buf); + let mut typed_buf = vec![0i64; src.len()]; + copy_and_offset_timestamps(src, &mut typed_buf); + let buf = Buffer::from_vec(typed_buf); let arr: ArrayRef = Arc::new(TimestampNanosecondArray::new( ScalarBuffer::new(buf, 0, src.len()), nulls, @@ -625,9 +624,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult validity = merge_infinity_i32(src, Q_INF_DATE, Q_NINF_DATE, validity); } let nulls = to_null_buffer(validity); - let mut bytes_buf = vec![0u8; src.len() * size_of::()]; - copy_and_offset_dates(src, bytemuck::cast_slice_mut(&mut bytes_buf)); - let buf = Buffer::from_vec(bytes_buf); + let mut typed_buf = vec![0i32; src.len()]; + copy_and_offset_dates(src, &mut typed_buf); + let buf = Buffer::from_vec(typed_buf); let arr: ArrayRef = Arc::new(Date32Array::new(ScalarBuffer::new(buf, 0, src.len()), nulls)); (arr, DataType::Date32) @@ -667,9 +666,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult validity = merge_infinity_i32(src, Q_INF_MINUTE, Q_NINF_MINUTE, validity); } let nulls = to_null_buffer(validity); - let mut bytes_buf = vec![0u8; src.len() * size_of::()]; - copy_and_minutes_to_seconds(src, bytemuck::cast_slice_mut(&mut bytes_buf)); - let buf = Buffer::from_vec(bytes_buf); + let mut typed_buf = vec![0i32; src.len()]; + copy_and_minutes_to_seconds(src, &mut typed_buf); + let buf = Buffer::from_vec(typed_buf); let arr: ArrayRef = Arc::new(Time32SecondArray::new( ScalarBuffer::new(buf, 0, src.len()), nulls, diff --git a/crates/qroissant-python/Cargo.toml b/crates/qroissant-python/Cargo.toml index 5ea132e..5f66815 100644 --- a/crates/qroissant-python/Cargo.toml +++ b/crates/qroissant-python/Cargo.toml @@ -14,7 +14,7 @@ path = "src/lib.rs" bb8 = "0.9.0" bytes = "1.11.1" chrono = "0.4.44" -pyo3 = { workspace = true, features = ["extension-module"] } +pyo3 = { workspace = true, features = ["extension-module", "abi3-py311"] } pyo3-arrow = { version = "0.17.0", default-features = false } pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] } qroissant-arrow = { path = "../qroissant-arrow" } diff --git a/crates/qroissant-python/src/values.rs b/crates/qroissant-python/src/values.rs index 21123b4..9c05170 100644 --- a/crates/qroissant-python/src/values.rs +++ b/crates/qroissant-python/src/values.rs @@ -15,6 +15,7 @@ use pyo3::types::PyTuple; use pyo3_arrow::ffi::ArrayIterator; use pyo3_arrow::ffi::to_array_pycapsules; use pyo3_arrow::ffi::to_stream_pycapsule; +use qroissant_arrow::HeterogeneousListMode; use qroissant_arrow::IngestionError; use qroissant_arrow::ProjectionOptions; use qroissant_arrow::ingest_array; @@ -210,7 +211,7 @@ impl Atom { let schema_capsule: Option> = requested_schema .map(|s| s.downcast_into::()) .transpose()?; - let opts = slf.as_super().projection_opts().clone(); + let opts = effective_export_opts(py, slf.as_super().projection_opts()); let export = project(slf.as_super().inner(), &opts) .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; let capsules = @@ -286,7 +287,7 @@ impl Vector { let schema_capsule: Option> = requested_schema .map(|s| s.downcast_into::()) .transpose()?; - let opts = slf.as_super().projection_opts().clone(); + let opts = effective_export_opts(py, slf.as_super().projection_opts()); let export = project(slf.as_super().inner(), &opts) .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; let capsules = @@ -366,7 +367,7 @@ impl List { let schema_capsule: Option> = requested_schema .map(|s| s.downcast_into::()) .transpose()?; - let opts = slf.as_super().projection_opts().clone(); + let opts = effective_export_opts(py, slf.as_super().projection_opts()); let export = project(slf.as_super().inner(), &opts) .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; let capsules = @@ -438,7 +439,7 @@ impl Dictionary { let schema_capsule: Option> = requested_schema .map(|s| s.downcast_into::()) .transpose()?; - let opts = slf.as_super().projection_opts().clone(); + let opts = effective_export_opts(py, slf.as_super().projection_opts()); let export = project(slf.as_super().inner(), &opts) .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; let capsules = @@ -535,7 +536,7 @@ impl Table { qroissant_core::Value::Table(t) => t.clone(), _ => unreachable!("Table instances always hold q tables"), }; - let opts = slf.as_super().projection_opts().clone(); + let opts = effective_export_opts(py, slf.as_super().projection_opts()); let export = py .detach(|| project_table(&table, &opts).map_err(|e| e.to_string())) .map_err(|e| PyNotImplementedError::new_err(e))?; @@ -829,6 +830,34 @@ fn map_ingestion_error(e: IngestionError) -> PyErr { PyValueError::new_err(e.to_string()) } +/// If Polars is loaded in this Python process and the caller has not opted out +/// of the default heterogeneous-list mode (still `Union`), transparently switch +/// to `CoalesceTemporals` so mixed-precision temporal columns surface as a flat +/// `Timestamp(ns)` instead of an Arrow `Union` (which Polars rejects with a +/// hard panic). +/// +/// Heuristic: presence of `polars` in `sys.modules` strongly correlates with a +/// Polars consumer for the Arrow PyCapsule export. Users who want faithful +/// `Union` despite Polars being imported should set the mode explicitly via +/// `DecodeOptions.with_coalesce_temporals(False)` — left as a documented +/// limitation, since explicit `Union` and the default are indistinguishable. +fn effective_export_opts(py: Python<'_>, opts: &Arc) -> Arc { + if opts.heterogeneous_list_mode != HeterogeneousListMode::Union { + return opts.clone(); + } + let polars_loaded = py + .import("sys") + .and_then(|sys| sys.getattr("modules")) + .and_then(|m| m.contains("polars")) + .unwrap_or(false); + if !polars_loaded { + return opts.clone(); + } + let mut next = (**opts).clone(); + next.heterogeneous_list_mode = HeterogeneousListMode::CoalesceTemporals; + Arc::new(next) +} + pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult { // Try qroissant Value first (it also implements Arrow protocols, so must come first). if let Ok(q_value) = value.extract::>() { diff --git a/pyproject.toml b/pyproject.toml index dda447b..5d1de00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "qroissant" -version = "0.3.0" +version = "0.3.1" description = "q/kdb+ IPC client library with Arrow-native Python interoperability" readme = "README.md" requires-python = ">=3.10"