bug fixes

This commit is contained in:
Cam Zalewski 2026-05-20 18:29:19 +01:00
parent a1a621ddfd
commit a1ec8ba292
6 changed files with 51 additions and 23 deletions

View file

@ -14,7 +14,7 @@ path = "src/lib.rs"
bb8 = "0.9.0"
bytes = "1.11.1"
chrono = "0.4.44"
pyo3 = { workspace = true, features = ["extension-module"] }
pyo3 = { workspace = true, features = ["extension-module", "abi3-py311"] }
pyo3-arrow = { version = "0.17.0", default-features = false }
pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] }
qroissant-arrow = { path = "../qroissant-arrow" }

View file

@ -15,6 +15,7 @@ use pyo3::types::PyTuple;
use pyo3_arrow::ffi::ArrayIterator;
use pyo3_arrow::ffi::to_array_pycapsules;
use pyo3_arrow::ffi::to_stream_pycapsule;
use qroissant_arrow::HeterogeneousListMode;
use qroissant_arrow::IngestionError;
use qroissant_arrow::ProjectionOptions;
use qroissant_arrow::ingest_array;
@ -210,7 +211,7 @@ impl Atom {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>())
.transpose()?;
let opts = slf.as_super().projection_opts().clone();
let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules =
@ -286,7 +287,7 @@ impl Vector {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>())
.transpose()?;
let opts = slf.as_super().projection_opts().clone();
let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules =
@ -366,7 +367,7 @@ impl List {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>())
.transpose()?;
let opts = slf.as_super().projection_opts().clone();
let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules =
@ -438,7 +439,7 @@ impl Dictionary {
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
.map(|s| s.downcast_into::<PyCapsule>())
.transpose()?;
let opts = slf.as_super().projection_opts().clone();
let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = project(slf.as_super().inner(), &opts)
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
let capsules =
@ -535,7 +536,7 @@ impl Table {
qroissant_core::Value::Table(t) => t.clone(),
_ => unreachable!("Table instances always hold q tables"),
};
let opts = slf.as_super().projection_opts().clone();
let opts = effective_export_opts(py, slf.as_super().projection_opts());
let export = py
.detach(|| project_table(&table, &opts).map_err(|e| e.to_string()))
.map_err(|e| PyNotImplementedError::new_err(e))?;
@ -829,6 +830,34 @@ fn map_ingestion_error(e: IngestionError) -> PyErr {
PyValueError::new_err(e.to_string())
}
/// If Polars is loaded in this Python process and the caller has not opted out
/// of the default heterogeneous-list mode (still `Union`), transparently switch
/// to `CoalesceTemporals` so mixed-precision temporal columns surface as a flat
/// `Timestamp(ns)` instead of an Arrow `Union` (which Polars rejects with a
/// hard panic).
///
/// Heuristic: presence of `polars` in `sys.modules` strongly correlates with a
/// Polars consumer for the Arrow PyCapsule export. Users who want faithful
/// `Union` despite Polars being imported should set the mode explicitly via
/// `DecodeOptions.with_coalesce_temporals(False)` — left as a documented
/// limitation, since explicit `Union` and the default are indistinguishable.
fn effective_export_opts(py: Python<'_>, opts: &Arc<ProjectionOptions>) -> Arc<ProjectionOptions> {
if opts.heterogeneous_list_mode != HeterogeneousListMode::Union {
return opts.clone();
}
let polars_loaded = py
.import("sys")
.and_then(|sys| sys.getattr("modules"))
.and_then(|m| m.contains("polars"))
.unwrap_or(false);
if !polars_loaded {
return opts.clone();
}
let mut next = (**opts).clone();
next.heterogeneous_list_mode = HeterogeneousListMode::CoalesceTemporals;
Arc::new(next)
}
pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult<CoreValue> {
// Try qroissant Value first (it also implements Arrow protocols, so must come first).
if let Ok(q_value) = value.extract::<PyRef<'_, Value>>() {