bug fixes

This commit is contained in:
Cam Zalewski 2026-05-20 18:29:19 +01:00
parent a1a621ddfd
commit a1ec8ba292
6 changed files with 51 additions and 23 deletions

10
Cargo.lock generated
View file

@ -908,7 +908,7 @@ dependencies = [
[[package]] [[package]]
name = "qroissant-arrow" name = "qroissant-arrow"
version = "0.3.0" version = "0.3.1"
dependencies = [ dependencies = [
"arrow-array", "arrow-array",
"arrow-buffer", "arrow-buffer",
@ -926,7 +926,7 @@ dependencies = [
[[package]] [[package]]
name = "qroissant-core" name = "qroissant-core"
version = "0.3.0" version = "0.3.1"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"bytes", "bytes",
@ -938,11 +938,11 @@ dependencies = [
[[package]] [[package]]
name = "qroissant-kernels" name = "qroissant-kernels"
version = "0.3.0" version = "0.3.1"
[[package]] [[package]]
name = "qroissant-python" name = "qroissant-python"
version = "0.3.0" version = "0.3.1"
dependencies = [ dependencies = [
"bb8", "bb8",
"bytes", "bytes",
@ -962,7 +962,7 @@ dependencies = [
[[package]] [[package]]
name = "qroissant-transport" name = "qroissant-transport"
version = "0.3.0" version = "0.3.1"
dependencies = [ dependencies = [
"bytes", "bytes",
"futures", "futures",

View file

@ -3,7 +3,7 @@ members = ["crates/qroissant-core", "crates/qroissant-kernels", "crates/qroissan
resolver = "3" resolver = "3"
[workspace.package] [workspace.package]
version = "0.3.0" version = "0.3.1"
edition = "2024" edition = "2024"
license = "Apache-2.0" license = "Apache-2.0"
repository = "https://github.com/qroissant/qroissant" repository = "https://github.com/qroissant/qroissant"

View file

@ -1,6 +1,5 @@
//! Arrow projection: converts decoded q `Value` trees into Arrow arrays. //! Arrow projection: converts decoded q `Value` trees into Arrow arrays.
use std::mem::size_of;
use std::ptr::NonNull; use std::ptr::NonNull;
use std::sync::Arc; use std::sync::Arc;
@ -601,9 +600,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult
validity = merge_infinity_i64(src, Q_INF_TIMESTAMP, Q_NINF_TIMESTAMP, validity); validity = merge_infinity_i64(src, Q_INF_TIMESTAMP, Q_NINF_TIMESTAMP, validity);
} }
let nulls = to_null_buffer(validity); let nulls = to_null_buffer(validity);
let mut bytes_buf = vec![0u8; src.len() * size_of::<i64>()]; let mut typed_buf = vec![0i64; src.len()];
copy_and_offset_timestamps(src, bytemuck::cast_slice_mut(&mut bytes_buf)); copy_and_offset_timestamps(src, &mut typed_buf);
let buf = Buffer::from_vec(bytes_buf); let buf = Buffer::from_vec(typed_buf);
let arr: ArrayRef = Arc::new(TimestampNanosecondArray::new( let arr: ArrayRef = Arc::new(TimestampNanosecondArray::new(
ScalarBuffer::new(buf, 0, src.len()), ScalarBuffer::new(buf, 0, src.len()),
nulls, nulls,
@ -625,9 +624,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult
validity = merge_infinity_i32(src, Q_INF_DATE, Q_NINF_DATE, validity); validity = merge_infinity_i32(src, Q_INF_DATE, Q_NINF_DATE, validity);
} }
let nulls = to_null_buffer(validity); let nulls = to_null_buffer(validity);
let mut bytes_buf = vec![0u8; src.len() * size_of::<i32>()]; let mut typed_buf = vec![0i32; src.len()];
copy_and_offset_dates(src, bytemuck::cast_slice_mut(&mut bytes_buf)); copy_and_offset_dates(src, &mut typed_buf);
let buf = Buffer::from_vec(bytes_buf); let buf = Buffer::from_vec(typed_buf);
let arr: ArrayRef = let arr: ArrayRef =
Arc::new(Date32Array::new(ScalarBuffer::new(buf, 0, src.len()), nulls)); Arc::new(Date32Array::new(ScalarBuffer::new(buf, 0, src.len()), nulls));
(arr, DataType::Date32) (arr, DataType::Date32)
@ -667,9 +666,9 @@ fn project_vector(vector: &Vector, opts: &ProjectionOptions) -> ProjectionResult
validity = merge_infinity_i32(src, Q_INF_MINUTE, Q_NINF_MINUTE, validity); validity = merge_infinity_i32(src, Q_INF_MINUTE, Q_NINF_MINUTE, validity);
} }
let nulls = to_null_buffer(validity); let nulls = to_null_buffer(validity);
let mut bytes_buf = vec![0u8; src.len() * size_of::<i32>()]; let mut typed_buf = vec![0i32; src.len()];
copy_and_minutes_to_seconds(src, bytemuck::cast_slice_mut(&mut bytes_buf)); copy_and_minutes_to_seconds(src, &mut typed_buf);
let buf = Buffer::from_vec(bytes_buf); let buf = Buffer::from_vec(typed_buf);
let arr: ArrayRef = Arc::new(Time32SecondArray::new( let arr: ArrayRef = Arc::new(Time32SecondArray::new(
ScalarBuffer::new(buf, 0, src.len()), ScalarBuffer::new(buf, 0, src.len()),
nulls, nulls,

View file

@ -14,7 +14,7 @@ path = "src/lib.rs"
bb8 = "0.9.0" bb8 = "0.9.0"
bytes = "1.11.1" bytes = "1.11.1"
chrono = "0.4.44" chrono = "0.4.44"
pyo3 = { workspace = true, features = ["extension-module"] } pyo3 = { workspace = true, features = ["extension-module", "abi3-py311"] }
pyo3-arrow = { version = "0.17.0", default-features = false } pyo3-arrow = { version = "0.17.0", default-features = false }
pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] } pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] }
qroissant-arrow = { path = "../qroissant-arrow" } qroissant-arrow = { path = "../qroissant-arrow" }

View file

@ -15,6 +15,7 @@ use pyo3::types::PyTuple;
use pyo3_arrow::ffi::ArrayIterator; use pyo3_arrow::ffi::ArrayIterator;
use pyo3_arrow::ffi::to_array_pycapsules; use pyo3_arrow::ffi::to_array_pycapsules;
use pyo3_arrow::ffi::to_stream_pycapsule; use pyo3_arrow::ffi::to_stream_pycapsule;
use qroissant_arrow::HeterogeneousListMode;
use qroissant_arrow::IngestionError; use qroissant_arrow::IngestionError;
use qroissant_arrow::ProjectionOptions; use qroissant_arrow::ProjectionOptions;
use qroissant_arrow::ingest_array; use qroissant_arrow::ingest_array;
@ -210,7 +211,7 @@ impl Atom {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>()) .map(|s| s.downcast_into::<PyCapsule>())
.transpose()?; .transpose()?;
let opts = slf.as_super().projection_opts().clone(); let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts) let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules = let capsules =
@ -286,7 +287,7 @@ impl Vector {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>()) .map(|s| s.downcast_into::<PyCapsule>())
.transpose()?; .transpose()?;
let opts = slf.as_super().projection_opts().clone(); let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts) let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules = let capsules =
@ -366,7 +367,7 @@ impl List {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>()) .map(|s| s.downcast_into::<PyCapsule>())
.transpose()?; .transpose()?;
let opts = slf.as_super().projection_opts().clone(); let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts) let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules = let capsules =
@ -438,7 +439,7 @@ impl Dictionary {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>()) .map(|s| s.downcast_into::<PyCapsule>())
.transpose()?; .transpose()?;
let opts = slf.as_super().projection_opts().clone(); let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts) let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?; .map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules = let capsules =
@ -535,7 +536,7 @@ impl Table {
qroissant_core::Value::Table(t) => t.clone(), qroissant_core::Value::Table(t) => t.clone(),
_ => unreachable!("Table instances always hold q tables"), _ => unreachable!("Table instances always hold q tables"),
}; };
let opts = slf.as_super().projection_opts().clone(); let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = py let export = py
.detach(|| project_table(&table, &opts).map_err(|e| e.to_string())) .detach(|| project_table(&table, &opts).map_err(|e| e.to_string()))
.map_err(|e| PyNotImplementedError::new_err(e))?; .map_err(|e| PyNotImplementedError::new_err(e))?;
@ -829,6 +830,34 @@ fn map_ingestion_error(e: IngestionError) -> PyErr {
PyValueError::new_err(e.to_string()) PyValueError::new_err(e.to_string())
} }
/// If Polars is loaded in this Python process and the caller has not opted out
/// of the default heterogeneous-list mode (still `Union`), transparently switch
/// to `CoalesceTemporals` so mixed-precision temporal columns surface as a flat
/// `Timestamp(ns)` instead of an Arrow `Union` (which Polars rejects with a
/// hard panic).
///
/// Heuristic: presence of `polars` in `sys.modules` strongly correlates with a
/// Polars consumer for the Arrow PyCapsule export. Users who want faithful
/// `Union` despite Polars being imported should set the mode explicitly via
/// `DecodeOptions.with_coalesce_temporals(False)` — left as a documented
/// limitation, since explicit `Union` and the default are indistinguishable.
fn effective_export_opts(py: Python<'_>, opts: &Arc<ProjectionOptions>) -> Arc<ProjectionOptions> {
if opts.heterogeneous_list_mode != HeterogeneousListMode::Union {
return opts.clone();
}
let polars_loaded = py
.import("sys")
.and_then(|sys| sys.getattr("modules"))
.and_then(|m| m.contains("polars"))
.unwrap_or(false);
if !polars_loaded {
return opts.clone();
}
let mut next = (**opts).clone();
next.heterogeneous_list_mode = HeterogeneousListMode::CoalesceTemporals;
Arc::new(next)
}
pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult<CoreValue> { pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult<CoreValue> {
// Try qroissant Value first (it also implements Arrow protocols, so must come first). // Try qroissant Value first (it also implements Arrow protocols, so must come first).
if let Ok(q_value) = value.extract::<PyRef<'_, Value>>() { if let Ok(q_value) = value.extract::<PyRef<'_, Value>>() {

View file

@ -4,7 +4,7 @@ build-backend = "maturin"
[project] [project]
name = "qroissant" name = "qroissant"
version = "0.3.0" version = "0.3.1"
description = "q/kdb+ IPC client library with Arrow-native Python interoperability" description = "q/kdb+ IPC client library with Arrow-native Python interoperability"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"