Vendor qroissant 0.3.0 baseline
This commit is contained in:
commit
53ac90fe84
56 changed files with 18309 additions and 0 deletions
27
crates/qroissant-python/Cargo.toml
Normal file
27
crates/qroissant-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
[package]
|
||||
name = "qroissant-python"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
name = "_native"
|
||||
crate-type = ["cdylib", "rlib"]
|
||||
path = "src/lib.rs"
|
||||
|
||||
[dependencies]
|
||||
bb8 = "0.9.0"
|
||||
bytes = "1.11.1"
|
||||
chrono = "0.4.44"
|
||||
pyo3 = { workspace = true, features = ["extension-module"] }
|
||||
pyo3-arrow = { version = "0.17.0", default-features = false }
|
||||
pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] }
|
||||
qroissant-arrow = { path = "../qroissant-arrow" }
|
||||
qroissant-core = { path = "../qroissant-core" }
|
||||
qroissant-kernels = { path = "../qroissant-kernels" }
|
||||
qroissant-transport = { path = "../qroissant-transport" }
|
||||
r2d2 = "0.8.10"
|
||||
tabled = "0.17.0"
|
||||
thiserror = "2.0.18"
|
||||
tokio = { version = "1.48.0", features = ["io-util", "net", "rt-multi-thread", "sync", "time"] }
|
||||
1597
crates/qroissant-python/src/client.rs
Normal file
1597
crates/qroissant-python/src/client.rs
Normal file
File diff suppressed because it is too large
Load diff
114
crates/qroissant-python/src/errors.rs
Normal file
114
crates/qroissant-python/src/errors.rs
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
use pyo3::create_exception;
|
||||
use pyo3::exceptions::PyException;
|
||||
use pyo3::exceptions::PyNotImplementedError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyModule;
|
||||
use qroissant_transport::TransportError;
|
||||
use thiserror::Error;
|
||||
|
||||
create_exception!(
|
||||
qroissant,
|
||||
QroissantError,
|
||||
PyException,
|
||||
"Base exception for qroissant errors."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
DecodeError,
|
||||
QroissantError,
|
||||
"Raised when q IPC payload decoding fails."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
ProtocolError,
|
||||
QroissantError,
|
||||
"Raised when q IPC framing or protocol validation fails."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
TransportErrorPy,
|
||||
QroissantError,
|
||||
"Raised when transport IO or socket operations fail."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
OperationError,
|
||||
QroissantError,
|
||||
"Raised when an operation is unsupported in the current state."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
QRuntimeError,
|
||||
QroissantError,
|
||||
"Raised when the remote q process returns an error response."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
PoolError,
|
||||
QroissantError,
|
||||
"Raised when connection pool management fails."
|
||||
);
|
||||
create_exception!(
|
||||
qroissant,
|
||||
PoolClosedError,
|
||||
PoolError,
|
||||
"Raised when a closed pool is used."
|
||||
);
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum PythonError {
|
||||
#[error("{0}")]
|
||||
Decode(String),
|
||||
#[error("{0}")]
|
||||
Protocol(String),
|
||||
#[error("{0}")]
|
||||
Transport(String),
|
||||
#[error("{0}")]
|
||||
Operation(String),
|
||||
#[error("{0}")]
|
||||
QRuntime(String),
|
||||
#[error("{0}")]
|
||||
Pool(String),
|
||||
#[error("connection pool is closed")]
|
||||
PoolClosed,
|
||||
#[error("{0}")]
|
||||
NotImplemented(String),
|
||||
}
|
||||
|
||||
pub type PythonResult<T> = Result<T, PythonError>;
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
let py = module.py();
|
||||
module.add("QroissantError", py.get_type::<QroissantError>())?;
|
||||
module.add("DecodeError", py.get_type::<DecodeError>())?;
|
||||
module.add("ProtocolError", py.get_type::<ProtocolError>())?;
|
||||
module.add("TransportError", py.get_type::<TransportErrorPy>())?;
|
||||
module.add("OperationError", py.get_type::<OperationError>())?;
|
||||
module.add("QRuntimeError", py.get_type::<QRuntimeError>())?;
|
||||
module.add("PoolError", py.get_type::<PoolError>())?;
|
||||
module.add("PoolClosedError", py.get_type::<PoolClosedError>())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn to_py_err(error: PythonError) -> PyErr {
|
||||
match error {
|
||||
PythonError::Decode(message) => DecodeError::new_err(message),
|
||||
PythonError::Protocol(message) => ProtocolError::new_err(message),
|
||||
PythonError::Transport(message) => TransportErrorPy::new_err(message),
|
||||
PythonError::Operation(message) => OperationError::new_err(message),
|
||||
PythonError::QRuntime(message) => QRuntimeError::new_err(message),
|
||||
PythonError::Pool(message) => PoolError::new_err(message),
|
||||
PythonError::PoolClosed => PoolClosedError::new_err("connection pool is closed"),
|
||||
PythonError::NotImplemented(message) => PyNotImplementedError::new_err(message),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn map_transport_error(error: TransportError) -> PythonError {
|
||||
match error {
|
||||
TransportError::Closed => PythonError::Operation(error.to_string()),
|
||||
TransportError::Protocol(_) => PythonError::Protocol(error.to_string()),
|
||||
TransportError::Io(_)
|
||||
| TransportError::InvalidEndpoint(_)
|
||||
| TransportError::InvalidQueryLength(_) => PythonError::Transport(error.to_string()),
|
||||
}
|
||||
}
|
||||
28
crates/qroissant-python/src/lib.rs
Normal file
28
crates/qroissant-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#![allow(deprecated)]
|
||||
|
||||
//! Native Python module for qroissant.
|
||||
|
||||
mod client;
|
||||
mod errors;
|
||||
mod raw_response;
|
||||
mod repr;
|
||||
mod serde;
|
||||
mod types;
|
||||
mod values;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyModule;
|
||||
|
||||
#[pymodule]
|
||||
fn _native(_py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
module.add("__doc__", "Native qroissant extension")?;
|
||||
module.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||
errors::register(module)?;
|
||||
types::register(module)?;
|
||||
repr::register(module)?;
|
||||
values::register(module)?;
|
||||
raw_response::register(module)?;
|
||||
client::register(module)?;
|
||||
serde::register(module)?;
|
||||
Ok(())
|
||||
}
|
||||
777
crates/qroissant-python/src/raw_response.rs
Normal file
777
crates/qroissant-python/src/raw_response.rs
Normal file
|
|
@ -0,0 +1,777 @@
|
|||
use std::fmt;
|
||||
use std::io::Read;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::MutexGuard;
|
||||
|
||||
use pyo3::buffer::PyBuffer;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyAny;
|
||||
use pyo3::types::PyBytes;
|
||||
use pyo3::types::PyModule;
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
use qroissant_core::HEADER_LEN;
|
||||
use qroissant_core::MessageHeader as CoreMessageHeader;
|
||||
use tokio::io::AsyncRead;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio::task::spawn_blocking;
|
||||
|
||||
use crate::serde::decode_core_value;
|
||||
use crate::types::Compression;
|
||||
use crate::types::DecodeOptions;
|
||||
use crate::types::Encoding;
|
||||
use crate::types::MessageHeader;
|
||||
use crate::types::MessageType;
|
||||
use crate::values::core_value_to_python_with_opts;
|
||||
|
||||
pub(crate) trait SyncRawLease: Read + Send {
|
||||
fn mark_reusable(&mut self);
|
||||
fn abandon(&mut self);
|
||||
}
|
||||
|
||||
pub(crate) trait AsyncStreamingLease: AsyncRead + AsyncWrite + Send + Unpin {
|
||||
fn mark_reusable(&mut self);
|
||||
fn abandon(&mut self);
|
||||
}
|
||||
|
||||
pub(crate) struct BlockingAsyncBridge<T> {
|
||||
inner: T,
|
||||
handle: tokio::runtime::Handle,
|
||||
}
|
||||
|
||||
impl<T> BlockingAsyncBridge<T> {
|
||||
pub(crate) fn new(inner: T) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
handle: tokio::runtime::Handle::current(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Read for BlockingAsyncBridge<T>
|
||||
where
|
||||
T: AsyncStreamingLease,
|
||||
{
|
||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let handle = self.handle.clone();
|
||||
let inner = &mut self.inner;
|
||||
let fut = async move { inner.read(buf).await };
|
||||
if tokio::runtime::Handle::try_current().is_ok() {
|
||||
tokio::task::block_in_place(|| handle.block_on(fut))
|
||||
} else {
|
||||
handle.block_on(fut)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> SyncRawLease for BlockingAsyncBridge<T>
|
||||
where
|
||||
T: AsyncStreamingLease,
|
||||
{
|
||||
fn mark_reusable(&mut self) {
|
||||
self.inner.mark_reusable();
|
||||
}
|
||||
|
||||
fn abandon(&mut self) {
|
||||
self.inner.abandon();
|
||||
}
|
||||
}
|
||||
|
||||
fn closed_raw_response_error() -> PyErr {
|
||||
pyo3::exceptions::PyValueError::new_err("I/O operation on closed qroissant raw response")
|
||||
}
|
||||
|
||||
fn backend_lock_error() -> PyErr {
|
||||
pyo3::exceptions::PyRuntimeError::new_err("qroissant raw response state is poisoned")
|
||||
}
|
||||
|
||||
fn unsupported_seek_error() -> PyErr {
|
||||
pyo3::exceptions::PyOSError::new_err(
|
||||
"qroissant raw streaming responses are forward-only and do not support seek()",
|
||||
)
|
||||
}
|
||||
|
||||
fn readonly_buffer_error() -> PyErr {
|
||||
pyo3::exceptions::PyTypeError::new_err("readinto() requires a writable buffer")
|
||||
}
|
||||
|
||||
fn non_contiguous_buffer_error() -> PyErr {
|
||||
pyo3::exceptions::PyTypeError::new_err("readinto() requires a C-contiguous buffer")
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum RawReadError {
|
||||
Closed,
|
||||
BackendPoisoned,
|
||||
PartiallyConsumed,
|
||||
Io(std::io::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for RawReadError {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
Self::Io(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RawReadError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Closed => write!(f, "raw response is closed"),
|
||||
Self::BackendPoisoned => write!(f, "raw response backend is poisoned"),
|
||||
Self::PartiallyConsumed => {
|
||||
write!(f, "raw response has already been partially consumed")
|
||||
}
|
||||
Self::Io(error) => error.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_read_error_to_py(error: RawReadError) -> PyErr {
|
||||
match error {
|
||||
RawReadError::Closed => closed_raw_response_error(),
|
||||
RawReadError::BackendPoisoned => backend_lock_error(),
|
||||
RawReadError::PartiallyConsumed => pyo3::exceptions::PyValueError::new_err(
|
||||
"cannot decode a partially consumed raw response",
|
||||
),
|
||||
RawReadError::Io(error) => PyErr::from(error),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_writable_contiguous_u8_buffer(payload: &Bound<'_, PyAny>) -> PyResult<PyBuffer<u8>> {
|
||||
let buffer = PyBuffer::<u8>::get(payload)?;
|
||||
if buffer.readonly() {
|
||||
return Err(readonly_buffer_error());
|
||||
}
|
||||
if !buffer.is_c_contiguous() {
|
||||
return Err(non_contiguous_buffer_error());
|
||||
}
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
enum RawResponseBackend {
|
||||
Buffered {
|
||||
payload: Vec<u8>,
|
||||
position: usize,
|
||||
},
|
||||
Streaming {
|
||||
header_bytes: [u8; HEADER_LEN],
|
||||
header_position: usize,
|
||||
remaining_body: usize,
|
||||
position: usize,
|
||||
lease: Option<Box<dyn SyncRawLease>>,
|
||||
},
|
||||
Closed,
|
||||
}
|
||||
|
||||
struct RawResponseState {
|
||||
header: MessageHeader,
|
||||
backend: RawResponseBackend,
|
||||
}
|
||||
|
||||
impl RawResponseState {
|
||||
fn streaming_remaining_total(header_position: usize, remaining_body: usize) -> usize {
|
||||
(HEADER_LEN - header_position) + remaining_body
|
||||
}
|
||||
|
||||
fn finalize_stream(lease: &mut Option<Box<dyn SyncRawLease>>, reusable: bool) {
|
||||
if let Some(mut lease) = lease.take() {
|
||||
if reusable {
|
||||
lease.mark_reusable();
|
||||
} else {
|
||||
lease.abandon();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn close_backend(backend: &mut RawResponseBackend) {
|
||||
let backend = std::mem::replace(backend, RawResponseBackend::Closed);
|
||||
match backend {
|
||||
RawResponseBackend::Buffered { .. } | RawResponseBackend::Closed => {}
|
||||
RawResponseBackend::Streaming {
|
||||
remaining_body,
|
||||
header_position,
|
||||
mut lease,
|
||||
..
|
||||
} => {
|
||||
let reusable =
|
||||
Self::streaming_remaining_total(header_position, remaining_body) == 0;
|
||||
Self::finalize_stream(&mut lease, reusable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_streaming_into(
|
||||
header_bytes: &[u8; HEADER_LEN],
|
||||
header_position: &mut usize,
|
||||
remaining_body: &mut usize,
|
||||
position: &mut usize,
|
||||
lease: &mut Option<Box<dyn SyncRawLease>>,
|
||||
out: &mut [u8],
|
||||
) -> Result<usize, RawReadError> {
|
||||
let total_remaining = Self::streaming_remaining_total(*header_position, *remaining_body);
|
||||
if total_remaining == 0 {
|
||||
Self::finalize_stream(lease, true);
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let target = out.len().min(total_remaining);
|
||||
let mut filled = 0_usize;
|
||||
let header_copied = if *header_position < HEADER_LEN && filled < target {
|
||||
let available = HEADER_LEN - *header_position;
|
||||
let to_copy = (target - filled).min(available);
|
||||
out[..to_copy]
|
||||
.copy_from_slice(&header_bytes[*header_position..*header_position + to_copy]);
|
||||
*header_position += to_copy;
|
||||
filled += to_copy;
|
||||
to_copy
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
if filled < target {
|
||||
while filled < target {
|
||||
let lease_ref = lease
|
||||
.as_mut()
|
||||
.expect("streaming raw responses must hold an active lease");
|
||||
let read = lease_ref.read(&mut out[filled..target])?;
|
||||
if read == 0 {
|
||||
Self::finalize_stream(lease, false);
|
||||
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||
}
|
||||
filled += read;
|
||||
}
|
||||
}
|
||||
|
||||
let body_bytes = filled.saturating_sub(header_copied);
|
||||
if body_bytes != 0 {
|
||||
*remaining_body = remaining_body.saturating_sub(body_bytes);
|
||||
}
|
||||
*position = position.saturating_add(filled);
|
||||
|
||||
if Self::streaming_remaining_total(*header_position, *remaining_body) == 0 {
|
||||
Self::finalize_stream(lease, true);
|
||||
}
|
||||
Ok(filled)
|
||||
}
|
||||
}
|
||||
|
||||
fn header_from_payload(payload: &[u8]) -> PyResult<MessageHeader> {
|
||||
if payload.len() < HEADER_LEN {
|
||||
return Ok(MessageHeader::new_native(
|
||||
Encoding::LittleEndian,
|
||||
MessageType::Response,
|
||||
Compression::Uncompressed,
|
||||
payload.len(),
|
||||
));
|
||||
}
|
||||
let header = CoreMessageHeader::parse(payload)
|
||||
.map_err(|error| pyo3::exceptions::PyValueError::new_err(error.to_string()))?;
|
||||
Ok(MessageHeader::from(header))
|
||||
}
|
||||
|
||||
#[pyclass(module = "qroissant")]
|
||||
pub struct RawResponse {
|
||||
state: Arc<Mutex<RawResponseState>>,
|
||||
}
|
||||
|
||||
impl RawResponse {
|
||||
fn lock_state_result(&self) -> Result<MutexGuard<'_, RawResponseState>, RawReadError> {
|
||||
self.state.lock().map_err(|_| RawReadError::BackendPoisoned)
|
||||
}
|
||||
|
||||
fn lock_state(&self) -> PyResult<MutexGuard<'_, RawResponseState>> {
|
||||
self.lock_state_result().map_err(raw_read_error_to_py)
|
||||
}
|
||||
|
||||
fn ensure_open(backend: &RawResponseBackend) -> PyResult<()> {
|
||||
if matches!(backend, RawResponseBackend::Closed) {
|
||||
return Err(closed_raw_response_error());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_open_result(backend: &RawResponseBackend) -> Result<(), RawReadError> {
|
||||
if matches!(backend, RawResponseBackend::Closed) {
|
||||
return Err(RawReadError::Closed);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn buffered(payload: Vec<u8>) -> PyResult<Self> {
|
||||
let header = header_from_payload(&payload)?;
|
||||
Ok(Self {
|
||||
state: Arc::new(Mutex::new(RawResponseState {
|
||||
header,
|
||||
backend: RawResponseBackend::Buffered {
|
||||
payload,
|
||||
position: 0,
|
||||
},
|
||||
})),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn streaming(
|
||||
header: MessageHeader,
|
||||
header_bytes: [u8; HEADER_LEN],
|
||||
remaining_body: usize,
|
||||
lease: Box<dyn SyncRawLease>,
|
||||
) -> Self {
|
||||
Self {
|
||||
state: Arc::new(Mutex::new(RawResponseState {
|
||||
header,
|
||||
backend: RawResponseBackend::Streaming {
|
||||
header_bytes,
|
||||
header_position: 0,
|
||||
remaining_body,
|
||||
position: 0,
|
||||
lease: Some(lease),
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn into_async(self) -> AsyncRawResponse {
|
||||
let this = std::mem::ManuallyDrop::new(self);
|
||||
// SAFETY: `ManuallyDrop` suppresses `RawResponse::drop`, so it is safe
|
||||
// to move the owned `Arc` into the async wrapper without closing the
|
||||
// underlying raw-response state.
|
||||
let state = unsafe { std::ptr::read(&this.state) };
|
||||
AsyncRawResponse { state }
|
||||
}
|
||||
|
||||
fn materialize_result(&self) -> Result<Vec<u8>, RawReadError> {
|
||||
let position = {
|
||||
let state = self.lock_state_result()?;
|
||||
Self::ensure_open_result(&state.backend)?;
|
||||
match &state.backend {
|
||||
RawResponseBackend::Buffered { position, .. }
|
||||
| RawResponseBackend::Streaming { position, .. } => *position,
|
||||
RawResponseBackend::Closed => {
|
||||
unreachable!("closed raw responses are handled above")
|
||||
}
|
||||
}
|
||||
};
|
||||
if position != 0 {
|
||||
return Err(RawReadError::PartiallyConsumed);
|
||||
}
|
||||
self.read_owned_result(None)
|
||||
}
|
||||
|
||||
fn read_owned_result(&self, size: Option<isize>) -> Result<Vec<u8>, RawReadError> {
|
||||
let mut state = self.lock_state_result()?;
|
||||
Self::ensure_open_result(&state.backend)?;
|
||||
match &mut state.backend {
|
||||
RawResponseBackend::Buffered { payload, position } => {
|
||||
if *position >= payload.len() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let remaining = payload.len() - *position;
|
||||
let to_read = match size {
|
||||
Some(size) if size >= 0 => remaining.min(size as usize),
|
||||
_ => remaining,
|
||||
};
|
||||
let start = *position;
|
||||
let end = start + to_read;
|
||||
*position = end;
|
||||
Ok(payload[start..end].to_vec())
|
||||
}
|
||||
RawResponseBackend::Streaming {
|
||||
header_bytes,
|
||||
header_position,
|
||||
remaining_body,
|
||||
position,
|
||||
lease,
|
||||
} => {
|
||||
let total_remaining =
|
||||
RawResponseState::streaming_remaining_total(*header_position, *remaining_body);
|
||||
let target = match size {
|
||||
Some(size) if size >= 0 => total_remaining.min(size as usize),
|
||||
_ => total_remaining,
|
||||
};
|
||||
let mut out = vec![0_u8; target];
|
||||
match RawResponseState::read_streaming_into(
|
||||
header_bytes,
|
||||
header_position,
|
||||
remaining_body,
|
||||
position,
|
||||
lease,
|
||||
&mut out,
|
||||
) {
|
||||
Ok(filled) => {
|
||||
out.truncate(filled);
|
||||
Ok(out)
|
||||
}
|
||||
Err(error) => {
|
||||
state.backend = RawResponseBackend::Closed;
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
RawResponseBackend::Closed => Err(RawReadError::Closed),
|
||||
}
|
||||
}
|
||||
|
||||
fn read_into_result(&self, out: &mut [u8]) -> Result<usize, RawReadError> {
|
||||
let mut state = self.lock_state_result()?;
|
||||
Self::ensure_open_result(&state.backend)?;
|
||||
match &mut state.backend {
|
||||
RawResponseBackend::Buffered { payload, position } => {
|
||||
if *position >= payload.len() {
|
||||
return Ok(0);
|
||||
}
|
||||
let remaining = payload.len() - *position;
|
||||
let to_read = remaining.min(out.len());
|
||||
let start = *position;
|
||||
let end = start + to_read;
|
||||
out[..to_read].copy_from_slice(&payload[start..end]);
|
||||
*position = end;
|
||||
Ok(to_read)
|
||||
}
|
||||
RawResponseBackend::Streaming {
|
||||
header_bytes,
|
||||
header_position,
|
||||
remaining_body,
|
||||
position,
|
||||
lease,
|
||||
} => match RawResponseState::read_streaming_into(
|
||||
header_bytes,
|
||||
header_position,
|
||||
remaining_body,
|
||||
position,
|
||||
lease,
|
||||
out,
|
||||
) {
|
||||
Ok(filled) => Ok(filled),
|
||||
Err(error) => {
|
||||
state.backend = RawResponseBackend::Closed;
|
||||
Err(error)
|
||||
}
|
||||
},
|
||||
RawResponseBackend::Closed => Err(RawReadError::Closed),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for RawResponse {
|
||||
fn drop(&mut self) {
|
||||
// Clean up even if the mutex is poisoned (panic in another thread).
|
||||
let mut state = match self.state.lock() {
|
||||
Ok(guard) => guard,
|
||||
Err(poisoned) => poisoned.into_inner(),
|
||||
};
|
||||
RawResponseState::close_backend(&mut state.backend);
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl RawResponse {
|
||||
#[new]
|
||||
fn new(payload: Vec<u8>) -> PyResult<Self> {
|
||||
Self::buffered(payload)
|
||||
}
|
||||
|
||||
fn __enter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||
slf
|
||||
}
|
||||
|
||||
fn __exit__(
|
||||
&self,
|
||||
_exc_type: Option<&Bound<'_, PyAny>>,
|
||||
_exc_val: Option<&Bound<'_, PyAny>>,
|
||||
_exc_tb: Option<&Bound<'_, PyAny>>,
|
||||
) -> PyResult<()> {
|
||||
self.close()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn closed(&self) -> bool {
|
||||
self.state
|
||||
.lock()
|
||||
.map(|state| matches!(state.backend, RawResponseBackend::Closed))
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn header(&self) -> PyResult<MessageHeader> {
|
||||
let state = self.lock_state()?;
|
||||
Ok(state.header.clone())
|
||||
}
|
||||
|
||||
fn close(&self) -> PyResult<()> {
|
||||
let mut state = self.lock_state()?;
|
||||
RawResponseState::close_backend(&mut state.backend);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn readable(&self) -> bool {
|
||||
!self.closed()
|
||||
}
|
||||
|
||||
fn seekable(&self) -> bool {
|
||||
self.state
|
||||
.lock()
|
||||
.map(|state| matches!(state.backend, RawResponseBackend::Buffered { .. }))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (size=None))]
|
||||
fn read<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyBytes>> {
|
||||
let bytes = py
|
||||
.detach(|| self.read_owned_result(size))
|
||||
.map_err(raw_read_error_to_py)?;
|
||||
Ok(PyBytes::new(py, &bytes))
|
||||
}
|
||||
|
||||
#[pyo3(signature = (size=None))]
|
||||
fn read1<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyBytes>> {
|
||||
self.read(py, size)
|
||||
}
|
||||
|
||||
fn readinto(&self, py: Python<'_>, buffer: &Bound<'_, PyAny>) -> PyResult<usize> {
|
||||
let writable = extract_writable_contiguous_u8_buffer(buffer)?;
|
||||
let len = writable.len_bytes();
|
||||
if len == 0 {
|
||||
let mut empty = [];
|
||||
return py
|
||||
.detach(|| self.read_into_result(&mut empty))
|
||||
.map_err(raw_read_error_to_py);
|
||||
}
|
||||
let ptr = writable.buf_ptr() as usize;
|
||||
py.detach(move || {
|
||||
let ptr = ptr as *mut u8;
|
||||
// SAFETY: the writable Python buffer outlives this detached call and
|
||||
// the slice length is bounded by the exported buffer length.
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, len) };
|
||||
self.read_into_result(slice)
|
||||
})
|
||||
.map_err(raw_read_error_to_py)
|
||||
}
|
||||
|
||||
fn readinto1(&self, py: Python<'_>, buffer: &Bound<'_, PyAny>) -> PyResult<usize> {
|
||||
self.readinto(py, buffer)
|
||||
}
|
||||
|
||||
fn tell(&self) -> PyResult<usize> {
|
||||
let state = self.lock_state()?;
|
||||
Self::ensure_open(&state.backend)?;
|
||||
match &state.backend {
|
||||
RawResponseBackend::Buffered { position, .. }
|
||||
| RawResponseBackend::Streaming { position, .. } => Ok(*position),
|
||||
RawResponseBackend::Closed => Err(closed_raw_response_error()),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(signature = (offset, whence=0))]
|
||||
fn seek(&self, offset: i64, whence: i32) -> PyResult<usize> {
|
||||
let mut state = self.lock_state()?;
|
||||
Self::ensure_open(&state.backend)?;
|
||||
match &mut state.backend {
|
||||
RawResponseBackend::Buffered { payload, position } => {
|
||||
let base = match whence {
|
||||
0 => 0_i64,
|
||||
1 => i64::try_from(*position).map_err(|_| {
|
||||
pyo3::exceptions::PyOverflowError::new_err(
|
||||
"raw response position exceeds supported seek range",
|
||||
)
|
||||
})?,
|
||||
2 => i64::try_from(payload.len()).map_err(|_| {
|
||||
pyo3::exceptions::PyOverflowError::new_err(
|
||||
"raw response length exceeds supported seek range",
|
||||
)
|
||||
})?,
|
||||
_ => {
|
||||
return Err(pyo3::exceptions::PyValueError::new_err(format!(
|
||||
"invalid seek whence value {whence}; expected 0, 1, or 2"
|
||||
)));
|
||||
}
|
||||
};
|
||||
let position_i64 = base.checked_add(offset).ok_or_else(|| {
|
||||
pyo3::exceptions::PyOverflowError::new_err(
|
||||
"raw response seek position overflowed",
|
||||
)
|
||||
})?;
|
||||
if position_i64 < 0 {
|
||||
return Err(pyo3::exceptions::PyValueError::new_err(
|
||||
"negative seek position is not allowed",
|
||||
));
|
||||
}
|
||||
*position = usize::try_from(position_i64).map_err(|_| {
|
||||
pyo3::exceptions::PyOverflowError::new_err(
|
||||
"raw response seek position overflowed",
|
||||
)
|
||||
})?;
|
||||
Ok(*position)
|
||||
}
|
||||
RawResponseBackend::Streaming { .. } => Err(unsupported_seek_error()),
|
||||
RawResponseBackend::Closed => Err(closed_raw_response_error()),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*, options=None))]
|
||||
fn decode(&self, py: Python<'_>, options: Option<&DecodeOptions>) -> PyResult<Py<PyAny>> {
|
||||
let payload = py
|
||||
.detach(|| self.materialize_result())
|
||||
.map_err(raw_read_error_to_py)?;
|
||||
let (value, opts) =
|
||||
decode_core_value(bytes::Bytes::from(payload), options)
|
||||
.map_err(crate::errors::to_py_err)?;
|
||||
core_value_to_python_with_opts(py, value, opts)
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
match self.state.lock() {
|
||||
Ok(state) => match &state.backend {
|
||||
RawResponseBackend::Buffered { payload, position } => format!(
|
||||
"RawResponse(mode='buffered', len={}, position={}, closed=false)",
|
||||
payload.len(),
|
||||
position
|
||||
),
|
||||
RawResponseBackend::Streaming {
|
||||
header_position,
|
||||
remaining_body,
|
||||
position,
|
||||
..
|
||||
} => format!(
|
||||
"RawResponse(mode='streaming', remaining={}, position={}, closed=false)",
|
||||
RawResponseState::streaming_remaining_total(*header_position, *remaining_body),
|
||||
position
|
||||
),
|
||||
RawResponseBackend::Closed => "RawResponse(mode='closed', closed=true)".to_string(),
|
||||
},
|
||||
Err(_) => "RawResponse(mode='poisoned', closed=true)".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(module = "qroissant")]
|
||||
pub struct AsyncRawResponse {
|
||||
state: Arc<Mutex<RawResponseState>>,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl AsyncRawResponse {
|
||||
fn __aenter__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
|
||||
let state = slf.state.clone();
|
||||
future_into_py(py, async move {
|
||||
Python::attach(|py| Py::new(py, Self { state }).map(|value| value.into_any()))
|
||||
})
|
||||
}
|
||||
|
||||
fn __aexit__<'py>(
|
||||
&self,
|
||||
py: Python<'py>,
|
||||
_exc_type: Option<&Bound<'_, PyAny>>,
|
||||
_exc_val: Option<&Bound<'_, PyAny>>,
|
||||
_exc_tb: Option<&Bound<'_, PyAny>>,
|
||||
) -> PyResult<Bound<'py, PyAny>> {
|
||||
let state = self.state.clone();
|
||||
future_into_py(py, async move {
|
||||
let mut state = state.lock().map_err(|_| backend_lock_error())?;
|
||||
RawResponseState::close_backend(&mut state.backend);
|
||||
Ok(false)
|
||||
})
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn closed(&self) -> bool {
|
||||
self.state
|
||||
.lock()
|
||||
.map(|state| matches!(state.backend, RawResponseBackend::Closed))
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn header(&self) -> PyResult<MessageHeader> {
|
||||
let state = self.state.lock().map_err(|_| backend_lock_error())?;
|
||||
Ok(state.header.clone())
|
||||
}
|
||||
|
||||
fn close<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
|
||||
let state = self.state.clone();
|
||||
future_into_py(py, async move {
|
||||
let mut state = state.lock().map_err(|_| backend_lock_error())?;
|
||||
RawResponseState::close_backend(&mut state.backend);
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (size=None))]
|
||||
fn read<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyAny>> {
|
||||
let raw = RawResponse {
|
||||
state: self.state.clone(),
|
||||
};
|
||||
future_into_py(py, async move {
|
||||
let bytes = spawn_blocking(move || raw.read_owned_result(size))
|
||||
.await
|
||||
.map_err(|error| pyo3::exceptions::PyRuntimeError::new_err(error.to_string()))?
|
||||
.map_err(raw_read_error_to_py)?;
|
||||
Python::attach(|py| Ok(PyBytes::new(py, &bytes).unbind().into_any()))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (size=None))]
|
||||
fn read1<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyAny>> {
|
||||
self.read(py, size)
|
||||
}
|
||||
|
||||
fn readinto<'py>(&self, py: Python<'py>, buffer: Py<PyAny>) -> PyResult<Bound<'py, PyAny>> {
|
||||
let state = self.state.clone();
|
||||
future_into_py(py, async move {
|
||||
Python::attach(|py| {
|
||||
let buffer = buffer.bind(py);
|
||||
let writable = extract_writable_contiguous_u8_buffer(buffer)?;
|
||||
let len = writable.len_bytes();
|
||||
if len == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
let ptr = writable.buf_ptr() as usize;
|
||||
let raw = RawResponse {
|
||||
state: state.clone(),
|
||||
};
|
||||
drop(writable);
|
||||
let read = py
|
||||
.detach(move || {
|
||||
let ptr = ptr as *mut u8;
|
||||
// SAFETY: the writable Python buffer outlives this detached call and
|
||||
// the slice length is bounded by the exported buffer length.
|
||||
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, len) };
|
||||
raw.read_into_result(slice)
|
||||
})
|
||||
.map_err(raw_read_error_to_py)?;
|
||||
Ok(read)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn readinto1<'py>(&self, py: Python<'py>, buffer: Py<PyAny>) -> PyResult<Bound<'py, PyAny>> {
|
||||
self.readinto(py, buffer)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*, options=None))]
|
||||
fn decode<'py>(
|
||||
&self,
|
||||
py: Python<'py>,
|
||||
options: Option<DecodeOptions>,
|
||||
) -> PyResult<Bound<'py, PyAny>> {
|
||||
let raw = RawResponse {
|
||||
state: self.state.clone(),
|
||||
};
|
||||
future_into_py(py, async move {
|
||||
let payload = spawn_blocking(move || raw.materialize_result())
|
||||
.await
|
||||
.map_err(|error| pyo3::exceptions::PyRuntimeError::new_err(error.to_string()))?
|
||||
.map_err(raw_read_error_to_py)?;
|
||||
let (value, opts) =
|
||||
decode_core_value(bytes::Bytes::from(payload), options.as_ref())
|
||||
.map_err(crate::errors::to_py_err)?;
|
||||
Python::attach(|py| core_value_to_python_with_opts(py, value, opts))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
module.add_class::<RawResponse>()?;
|
||||
module.add_class::<AsyncRawResponse>()?;
|
||||
Ok(())
|
||||
}
|
||||
437
crates/qroissant-python/src/repr/cell.rs
Normal file
437
crates/qroissant-python/src/repr/cell.rs
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
//! Cell-level value formatting for q atoms and vector items.
|
||||
//!
|
||||
//! Converts raw q IPC values (CoreValue primitives) to human-readable strings
|
||||
//! without any Arrow dependency. Null sentinels are rendered as `"null"`.
|
||||
//! Temporal values use ISO-like formats familiar to both q and Python users.
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use chrono::NaiveDateTime;
|
||||
use qroissant_core::Atom;
|
||||
use qroissant_core::VectorData;
|
||||
use qroissant_kernels::DATE_OFFSET_DAYS;
|
||||
use qroissant_kernels::MILLIS_PER_DAY;
|
||||
use qroissant_kernels::Q_NULL_DATE;
|
||||
use qroissant_kernels::Q_NULL_INT;
|
||||
use qroissant_kernels::Q_NULL_LONG;
|
||||
use qroissant_kernels::Q_NULL_MINUTE;
|
||||
use qroissant_kernels::Q_NULL_MONTH;
|
||||
use qroissant_kernels::Q_NULL_SECOND;
|
||||
use qroissant_kernels::Q_NULL_SHORT;
|
||||
use qroissant_kernels::Q_NULL_TIME;
|
||||
use qroissant_kernels::Q_NULL_TIMESPAN;
|
||||
use qroissant_kernels::Q_NULL_TIMESTAMP;
|
||||
use qroissant_kernels::TIMESTAMP_OFFSET_NS;
|
||||
|
||||
pub const MAX_CELL_CHARS: usize = 48;
|
||||
|
||||
/// Truncate a string to `MAX_CELL_CHARS` characters, appending `"..."` if cut.
|
||||
pub fn truncate(s: String) -> String {
|
||||
let mut chars = s.chars();
|
||||
let head: String = chars.by_ref().take(MAX_CELL_CHARS).collect();
|
||||
if chars.next().is_some() {
|
||||
format!("{head}...")
|
||||
} else {
|
||||
head
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Temporal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn format_date_days(q_days: i32) -> String {
|
||||
// q dates are days since 2000-01-01; NaiveDate::from_ymd uses Unix days
|
||||
let unix_days = q_days + DATE_OFFSET_DAYS;
|
||||
match NaiveDate::from_num_days_from_ce_opt(unix_days + 719_163) {
|
||||
Some(d) => d.format("%Y.%m.%d").to_string(),
|
||||
None => format!("<date:{q_days}>"),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_timestamp_ns(q_ns: i64) -> String {
|
||||
let unix_ns = q_ns.saturating_add(TIMESTAMP_OFFSET_NS);
|
||||
let secs = unix_ns.div_euclid(1_000_000_000);
|
||||
let nsecs = unix_ns.rem_euclid(1_000_000_000) as u32;
|
||||
match NaiveDateTime::from_timestamp_opt(secs, nsecs) {
|
||||
Some(dt) => dt.format("%Y.%m.%dT%H:%M:%S.%9f").to_string(),
|
||||
None => format!("<timestamp:{q_ns}>"),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_month_i32(q_months: i32) -> String {
|
||||
// q months are months since 2000-01; month 0 = 2000.01
|
||||
let total_months = 2000 * 12 + q_months;
|
||||
let year = total_months.div_euclid(12);
|
||||
let month = total_months.rem_euclid(12) + 1;
|
||||
format!("{year:04}.{month:02}m")
|
||||
}
|
||||
|
||||
fn format_datetime_f64(q_days: f64) -> String {
|
||||
let unix_ms = q_days * MILLIS_PER_DAY + 946_684_800_000.0;
|
||||
let unix_ms_i64 = unix_ms as i64;
|
||||
let secs = unix_ms_i64.div_euclid(1000);
|
||||
let ms = unix_ms_i64.rem_euclid(1000) as u32;
|
||||
match NaiveDateTime::from_timestamp_opt(secs, ms * 1_000_000) {
|
||||
Some(dt) => dt.format("%Y.%m.%dT%H:%M:%S.%3f").to_string(),
|
||||
None => format!("<datetime:{q_days}>"),
|
||||
}
|
||||
}
|
||||
|
||||
fn format_timespan_ns(q_ns: i64) -> String {
|
||||
// Timespans can be negative (use absolute value then sign)
|
||||
let (sign, abs_ns) = if q_ns < 0 {
|
||||
("-", (-(q_ns as i128)) as u64)
|
||||
} else {
|
||||
("", q_ns as u64)
|
||||
};
|
||||
let days = abs_ns / 86_400_000_000_000;
|
||||
let rem = abs_ns % 86_400_000_000_000;
|
||||
let hours = rem / 3_600_000_000_000;
|
||||
let rem = rem % 3_600_000_000_000;
|
||||
let minutes = rem / 60_000_000_000;
|
||||
let rem = rem % 60_000_000_000;
|
||||
let secs = rem / 1_000_000_000;
|
||||
let ns = rem % 1_000_000_000;
|
||||
format!("{sign}{days}D{hours:02}:{minutes:02}:{secs:02}.{ns:09}")
|
||||
}
|
||||
|
||||
fn format_minute_i32(total_minutes: i32) -> String {
|
||||
let h = total_minutes / 60;
|
||||
let m = total_minutes % 60;
|
||||
format!("{h:02}:{m:02}")
|
||||
}
|
||||
|
||||
fn format_second_i32(total_seconds: i32) -> String {
|
||||
let h = total_seconds / 3600;
|
||||
let m = (total_seconds / 60) % 60;
|
||||
let s = total_seconds % 60;
|
||||
format!("{h:02}:{m:02}:{s:02}")
|
||||
}
|
||||
|
||||
fn format_time_ms(total_ms: i32) -> String {
|
||||
let h = total_ms / 3_600_000;
|
||||
let m = (total_ms / 60_000) % 60;
|
||||
let s = (total_ms / 1000) % 60;
|
||||
let ms = total_ms % 1000;
|
||||
format!("{h:02}:{m:02}:{s:02}.{ms:03}")
|
||||
}
|
||||
|
||||
fn format_guid_bytes(bytes: &[u8; 16]) -> String {
|
||||
format!(
|
||||
"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
|
||||
bytes[0],
|
||||
bytes[1],
|
||||
bytes[2],
|
||||
bytes[3],
|
||||
bytes[4],
|
||||
bytes[5],
|
||||
bytes[6],
|
||||
bytes[7],
|
||||
bytes[8],
|
||||
bytes[9],
|
||||
bytes[10],
|
||||
bytes[11],
|
||||
bytes[12],
|
||||
bytes[13],
|
||||
bytes[14],
|
||||
bytes[15],
|
||||
)
|
||||
}
|
||||
|
||||
fn format_symbol_bytes(bytes: &[u8]) -> String {
|
||||
String::from_utf8_lossy(bytes).into_owned()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public API
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Format a q atom as a display string (no truncation applied).
|
||||
pub fn format_atom_raw(atom: &Atom) -> String {
|
||||
match atom {
|
||||
Atom::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
|
||||
Atom::Guid(bytes) => format_guid_bytes(bytes),
|
||||
Atom::Byte(b) => format!("0x{b:02x}"),
|
||||
Atom::Short(v) => {
|
||||
if *v == Q_NULL_SHORT {
|
||||
"null".to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
}
|
||||
Atom::Int(v) => {
|
||||
if *v == Q_NULL_INT {
|
||||
"null".to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
}
|
||||
Atom::Long(v) => {
|
||||
if *v == Q_NULL_LONG {
|
||||
"null".to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
}
|
||||
Atom::Real(v) => {
|
||||
if v.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
}
|
||||
Atom::Float(v) => {
|
||||
if v.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
}
|
||||
Atom::Char(b) => {
|
||||
let ch = *b as char;
|
||||
format!("\"{ch}\"")
|
||||
}
|
||||
Atom::Symbol(bytes) => format_symbol_bytes(bytes),
|
||||
Atom::Timestamp(v) => {
|
||||
if *v == Q_NULL_TIMESTAMP {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_timestamp_ns(*v)
|
||||
}
|
||||
}
|
||||
Atom::Month(v) => {
|
||||
if *v == Q_NULL_MONTH {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_month_i32(*v)
|
||||
}
|
||||
}
|
||||
Atom::Date(v) => {
|
||||
if *v == Q_NULL_DATE {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_date_days(*v)
|
||||
}
|
||||
}
|
||||
Atom::Datetime(v) => {
|
||||
if v.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_datetime_f64(*v)
|
||||
}
|
||||
}
|
||||
Atom::Timespan(v) => {
|
||||
if *v == Q_NULL_TIMESPAN {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_timespan_ns(*v)
|
||||
}
|
||||
}
|
||||
Atom::Minute(v) => {
|
||||
if *v == Q_NULL_MINUTE {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_minute_i32(*v)
|
||||
}
|
||||
}
|
||||
Atom::Second(v) => {
|
||||
if *v == Q_NULL_SECOND {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_second_i32(*v)
|
||||
}
|
||||
}
|
||||
Atom::Time(v) => {
|
||||
if *v == Q_NULL_TIME {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_time_ms(*v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format and truncate a q atom.
|
||||
pub fn format_atom_cell(atom: &Atom) -> String {
|
||||
truncate(format_atom_raw(atom))
|
||||
}
|
||||
|
||||
/// Format a single element from a `VectorData` at `index` (no truncation).
|
||||
pub fn format_vector_item_raw(data: &VectorData, index: usize) -> String {
|
||||
match data {
|
||||
VectorData::Boolean(v) => if v[index] != 0 { "true" } else { "false" }.to_string(),
|
||||
VectorData::Guid(v) => {
|
||||
let chunk: &[u8; 16] = v[index * 16..(index + 1) * 16].try_into().unwrap();
|
||||
format_guid_bytes(chunk)
|
||||
}
|
||||
VectorData::Byte(v) => format!("0x{:02x}", v[index]),
|
||||
VectorData::Short(_) => {
|
||||
let val = data.as_i16_slice()[index];
|
||||
if val == Q_NULL_SHORT {
|
||||
"null".to_string()
|
||||
} else {
|
||||
val.to_string()
|
||||
}
|
||||
}
|
||||
VectorData::Int(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_INT {
|
||||
"null".to_string()
|
||||
} else {
|
||||
val.to_string()
|
||||
}
|
||||
}
|
||||
VectorData::Long(_) => {
|
||||
let val = data.as_i64_slice()[index];
|
||||
if val == Q_NULL_LONG {
|
||||
"null".to_string()
|
||||
} else {
|
||||
val.to_string()
|
||||
}
|
||||
}
|
||||
VectorData::Real(_) => {
|
||||
let val = data.as_f32_slice()[index];
|
||||
if val.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
val.to_string()
|
||||
}
|
||||
}
|
||||
VectorData::Float(_) => {
|
||||
let val = data.as_f64_slice()[index];
|
||||
if val.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
val.to_string()
|
||||
}
|
||||
}
|
||||
VectorData::Char(v) => {
|
||||
let ch = v[index] as char;
|
||||
ch.to_string()
|
||||
}
|
||||
VectorData::Symbol(v) => format_symbol_bytes(&v[index]),
|
||||
VectorData::Timestamp(_) => {
|
||||
let val = data.as_i64_slice()[index];
|
||||
if val == Q_NULL_TIMESTAMP {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_timestamp_ns(val)
|
||||
}
|
||||
}
|
||||
VectorData::Month(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_MONTH {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_month_i32(val)
|
||||
}
|
||||
}
|
||||
VectorData::Date(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_DATE {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_date_days(val)
|
||||
}
|
||||
}
|
||||
VectorData::Datetime(_) => {
|
||||
let val = data.as_f64_slice()[index];
|
||||
if val.is_nan() {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_datetime_f64(val)
|
||||
}
|
||||
}
|
||||
VectorData::Timespan(_) => {
|
||||
let val = data.as_i64_slice()[index];
|
||||
if val == Q_NULL_TIMESPAN {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_timespan_ns(val)
|
||||
}
|
||||
}
|
||||
VectorData::Minute(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_MINUTE {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_minute_i32(val)
|
||||
}
|
||||
}
|
||||
VectorData::Second(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_SECOND {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_second_i32(val)
|
||||
}
|
||||
}
|
||||
VectorData::Time(_) => {
|
||||
let val = data.as_i32_slice()[index];
|
||||
if val == Q_NULL_TIME {
|
||||
"null".to_string()
|
||||
} else {
|
||||
format_time_ms(val)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Format and truncate a single vector item.
|
||||
pub fn format_vector_item(data: &VectorData, index: usize) -> String {
|
||||
truncate(format_vector_item_raw(data, index))
|
||||
}
|
||||
|
||||
/// Format a char vector as a quoted string (e.g. `"abc"`), truncated.
|
||||
pub fn format_char_vector(data: &[u8]) -> String {
|
||||
let s: String = data.iter().map(|&b| b as char).collect();
|
||||
truncate(format!("\"{s}\""))
|
||||
}
|
||||
|
||||
/// Return the q primitive label for a `VectorData`.
|
||||
pub fn primitive_label(data: &VectorData) -> &'static str {
|
||||
match data {
|
||||
VectorData::Boolean(_) => "boolean",
|
||||
VectorData::Guid(_) => "guid",
|
||||
VectorData::Byte(_) => "byte",
|
||||
VectorData::Short(_) => "short",
|
||||
VectorData::Int(_) => "int",
|
||||
VectorData::Long(_) => "long",
|
||||
VectorData::Real(_) => "real",
|
||||
VectorData::Float(_) => "float",
|
||||
VectorData::Char(_) => "char",
|
||||
VectorData::Symbol(_) => "symbol",
|
||||
VectorData::Timestamp(_) => "timestamp",
|
||||
VectorData::Month(_) => "month",
|
||||
VectorData::Date(_) => "date",
|
||||
VectorData::Datetime(_) => "datetime",
|
||||
VectorData::Timespan(_) => "timespan",
|
||||
VectorData::Minute(_) => "minute",
|
||||
VectorData::Second(_) => "second",
|
||||
VectorData::Time(_) => "time",
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the q primitive label for an `Atom`.
|
||||
pub fn atom_primitive_label(atom: &Atom) -> &'static str {
|
||||
match atom {
|
||||
Atom::Boolean(_) => "boolean",
|
||||
Atom::Guid(_) => "guid",
|
||||
Atom::Byte(_) => "byte",
|
||||
Atom::Short(_) => "short",
|
||||
Atom::Int(_) => "int",
|
||||
Atom::Long(_) => "long",
|
||||
Atom::Real(_) => "real",
|
||||
Atom::Float(_) => "float",
|
||||
Atom::Char(_) => "char",
|
||||
Atom::Symbol(_) => "symbol",
|
||||
Atom::Timestamp(_) => "timestamp",
|
||||
Atom::Month(_) => "month",
|
||||
Atom::Date(_) => "date",
|
||||
Atom::Datetime(_) => "datetime",
|
||||
Atom::Timespan(_) => "timespan",
|
||||
Atom::Minute(_) => "minute",
|
||||
Atom::Second(_) => "second",
|
||||
Atom::Time(_) => "time",
|
||||
}
|
||||
}
|
||||
278
crates/qroissant-python/src/repr/format.rs
Normal file
278
crates/qroissant-python/src/repr/format.rs
Normal file
|
|
@ -0,0 +1,278 @@
|
|||
//! High-level format functions for each q value shape.
|
||||
//!
|
||||
//! Each function produces a multi-line ASCII repr string. Rendering is driven
|
||||
//! by the active [`FormattingOptions`] (read from the process-wide global).
|
||||
|
||||
use qroissant_core::Atom as CoreAtom;
|
||||
use qroissant_core::Dictionary as CoreDictionary;
|
||||
use qroissant_core::List as CoreList;
|
||||
use qroissant_core::Table as CoreTable;
|
||||
use qroissant_core::Value as CoreValue;
|
||||
use qroissant_core::Vector as CoreVector;
|
||||
use qroissant_core::VectorData;
|
||||
|
||||
use super::cell::atom_primitive_label;
|
||||
use super::cell::format_atom_cell;
|
||||
use super::cell::format_atom_raw;
|
||||
use super::cell::format_char_vector;
|
||||
use super::cell::format_vector_item;
|
||||
use super::cell::primitive_label;
|
||||
use super::cell::truncate;
|
||||
use super::options::active_options;
|
||||
use super::render::PreviewSlot;
|
||||
use super::render::preview_slots;
|
||||
use super::render::render_preview;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Attribute helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn attribute_label(attribute: qroissant_core::Attribute) -> &'static str {
|
||||
match attribute {
|
||||
qroissant_core::Attribute::None => "none",
|
||||
qroissant_core::Attribute::Sorted => "sorted",
|
||||
qroissant_core::Attribute::Unique => "unique",
|
||||
qroissant_core::Attribute::Parted => "parted",
|
||||
qroissant_core::Attribute::Grouped => "grouped",
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Atom
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn format_atom(atom: &CoreAtom) -> String {
|
||||
let label = atom_primitive_label(atom);
|
||||
render_preview(
|
||||
vec![format!("Atom [{label}]")],
|
||||
vec!["value".to_string()],
|
||||
vec![vec![format_atom_cell(atom)]],
|
||||
vec!["shape: (1,)".to_string()],
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vector
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn format_vector(vector: &CoreVector) -> String {
|
||||
let len = vector.len();
|
||||
let data = vector.data();
|
||||
let label = primitive_label(data);
|
||||
let attr = vector.attribute();
|
||||
|
||||
let rows = match data {
|
||||
VectorData::Char(chars) => {
|
||||
vec![vec![format_char_vector(chars)]]
|
||||
}
|
||||
_ => {
|
||||
let opts = active_options();
|
||||
preview_slots(len, opts.max_rows, opts.row_display)
|
||||
.into_iter()
|
||||
.map(|slot| match slot {
|
||||
PreviewSlot::Index(i) => vec![format_vector_item(data, i)],
|
||||
PreviewSlot::Ellipsis => vec!["...".to_string()],
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
};
|
||||
|
||||
render_preview(
|
||||
vec![format!("Vector [{label}, attr={}]", attribute_label(attr))],
|
||||
vec!["value".to_string()],
|
||||
rows,
|
||||
vec![format!("shape: ({len},)")],
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// List
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Compact single-line summary of any `CoreValue` (used for list/dict cells).
|
||||
fn inline_value_summary(value: &CoreValue) -> String {
|
||||
match value {
|
||||
CoreValue::Atom(atom) => truncate(format!(
|
||||
"{} [{}]",
|
||||
format_atom_raw(atom),
|
||||
atom_primitive_label(atom)
|
||||
)),
|
||||
CoreValue::Vector(vector) => {
|
||||
let label = primitive_label(vector.data());
|
||||
let len = vector.len();
|
||||
match vector.data() {
|
||||
VectorData::Char(chars) => truncate(format_char_vector(chars)),
|
||||
_ => truncate(format!("vector<{label}>[{len}]")),
|
||||
}
|
||||
}
|
||||
CoreValue::List(list) => truncate(format!("list[{}]", list.len())),
|
||||
CoreValue::Dictionary(dict) => truncate(format!("dict[{}]", dict.len())),
|
||||
CoreValue::Table(table) => {
|
||||
truncate(format!("table[{}x{}]", table.len(), table.num_columns()))
|
||||
}
|
||||
CoreValue::UnaryPrimitive { opcode } => truncate(format!("unary(0x{opcode:02x})")),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_list(list: &CoreList) -> String {
|
||||
let len = list.len();
|
||||
let opts = active_options();
|
||||
let attr = list.attribute();
|
||||
|
||||
let rows = preview_slots(len, opts.max_rows, opts.row_display)
|
||||
.into_iter()
|
||||
.map(|slot| match slot {
|
||||
PreviewSlot::Index(i) => vec![inline_value_summary(&list.values()[i])],
|
||||
PreviewSlot::Ellipsis => vec!["...".to_string()],
|
||||
})
|
||||
.collect();
|
||||
|
||||
render_preview(
|
||||
vec![format!("List [list, attr={}]", attribute_label(attr))],
|
||||
vec!["value".to_string()],
|
||||
rows,
|
||||
vec![format!("shape: ({len},)")],
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Dictionary
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
pub fn format_dictionary(dict: &CoreDictionary) -> String {
|
||||
let size = dict.len();
|
||||
let sorted = dict.sorted();
|
||||
|
||||
let all_rows = vec![
|
||||
vec!["keys".to_string(), inline_value_summary(dict.keys())],
|
||||
vec!["values".to_string(), inline_value_summary(dict.values())],
|
||||
];
|
||||
|
||||
let opts = active_options();
|
||||
let rows = preview_slots(all_rows.len(), opts.max_rows, opts.row_display)
|
||||
.into_iter()
|
||||
.map(|slot| match slot {
|
||||
PreviewSlot::Index(i) => all_rows[i].clone(),
|
||||
PreviewSlot::Ellipsis => vec!["...".to_string(), "...".to_string()],
|
||||
})
|
||||
.collect();
|
||||
|
||||
render_preview(
|
||||
vec![format!("Dictionary [dict, sorted={sorted}]")],
|
||||
vec!["part".to_string(), "value".to_string()],
|
||||
rows,
|
||||
vec![format!("shape: ({size},)")],
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Table
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn column_primitive_label(col: &CoreValue) -> &'static str {
|
||||
match col {
|
||||
CoreValue::Vector(v) => primitive_label(v.data()),
|
||||
CoreValue::List(_) => "list",
|
||||
CoreValue::Atom(_) => "atom",
|
||||
_ => "?",
|
||||
}
|
||||
}
|
||||
|
||||
fn table_cell(col: &CoreValue, row_index: usize) -> String {
|
||||
match col {
|
||||
CoreValue::Vector(v) => match v.data() {
|
||||
VectorData::Char(chars) => {
|
||||
// Show a single char per cell
|
||||
if row_index < chars.len() {
|
||||
(chars[row_index] as char).to_string()
|
||||
} else {
|
||||
"?".to_string()
|
||||
}
|
||||
}
|
||||
data => format_vector_item(data, row_index),
|
||||
},
|
||||
CoreValue::Atom(atom) => format_atom_cell(atom),
|
||||
CoreValue::List(list) => {
|
||||
if row_index < list.len() {
|
||||
inline_value_summary(&list.values()[row_index])
|
||||
} else {
|
||||
"?".to_string()
|
||||
}
|
||||
}
|
||||
_ => inline_value_summary(col),
|
||||
}
|
||||
}
|
||||
|
||||
fn column_name(raw: &[u8]) -> String {
|
||||
String::from_utf8_lossy(raw).into_owned()
|
||||
}
|
||||
|
||||
pub fn format_table(table: &CoreTable) -> String {
|
||||
let num_rows = table.len();
|
||||
let num_cols = table.num_columns();
|
||||
let opts = active_options();
|
||||
let visible_cols = num_cols.min(opts.max_columns);
|
||||
|
||||
// Build headers: "name\ntype" for each visible column
|
||||
let mut headers: Vec<String> = table
|
||||
.column_names()
|
||||
.iter()
|
||||
.zip(table.columns().iter())
|
||||
.take(visible_cols)
|
||||
.map(|(name, col)| {
|
||||
let col_name = truncate(column_name(name));
|
||||
let type_label = column_primitive_label(col);
|
||||
format!("{col_name}\n{type_label}")
|
||||
})
|
||||
.collect();
|
||||
|
||||
if num_cols > visible_cols {
|
||||
headers.push("...\n...".to_string());
|
||||
} else if headers.is_empty() {
|
||||
headers.push("value".to_string());
|
||||
}
|
||||
|
||||
// Build rows
|
||||
let row_slots = preview_slots(num_rows, opts.max_rows, opts.row_display);
|
||||
let columns = table.columns();
|
||||
|
||||
let body_rows: Vec<Vec<String>> = row_slots
|
||||
.into_iter()
|
||||
.map(|slot| {
|
||||
let mut row: Vec<String> = match slot {
|
||||
PreviewSlot::Index(row_i) => (0..visible_cols)
|
||||
.map(|col_i| table_cell(&columns[col_i], row_i))
|
||||
.collect(),
|
||||
PreviewSlot::Ellipsis => vec!["...".to_string(); visible_cols.max(1)],
|
||||
};
|
||||
if num_cols > visible_cols {
|
||||
row.push("...".to_string());
|
||||
}
|
||||
row
|
||||
})
|
||||
.collect();
|
||||
|
||||
render_preview(
|
||||
vec![format!(
|
||||
"Table [table, attr={}]",
|
||||
attribute_label(table.attribute())
|
||||
)],
|
||||
headers,
|
||||
body_rows,
|
||||
vec![format!("shape: ({num_rows}, {num_cols})")],
|
||||
)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// UnaryPrimitive
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn format_unary_primitive(opcode: i8) -> String {
|
||||
render_preview(
|
||||
vec!["UnaryPrimitive [unary_primitive]".to_string()],
|
||||
vec!["opcode".to_string()],
|
||||
vec![vec![format!("0x{opcode:02x}")]],
|
||||
vec!["shape: (1,)".to_string()],
|
||||
)
|
||||
}
|
||||
26
crates/qroissant-python/src/repr/mod.rs
Normal file
26
crates/qroissant-python/src/repr/mod.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
//! Pretty repr system for qroissant Python values.
|
||||
//!
|
||||
//! This module provides:
|
||||
//! - [`options`] — global `FormattingOptions`, `RowDisplay`, and associated
|
||||
//! builder and pyfunctions (`get_formatting_options`, `set_formatting_options`,
|
||||
//! `reset_formatting_options`).
|
||||
//! - [`cell`] — individual q value → string conversion without Arrow.
|
||||
//! - [`render`] — ASCII table rendering via `tabled` and `preview_slots`.
|
||||
//! - [`format`] — shape-level formatting functions called by `__repr__`/`__str__`.
|
||||
|
||||
pub mod cell;
|
||||
pub mod format;
|
||||
pub mod options;
|
||||
pub mod render;
|
||||
|
||||
pub use format::format_atom;
|
||||
pub use format::format_dictionary;
|
||||
pub use format::format_list;
|
||||
pub use format::format_table;
|
||||
pub use format::format_vector;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyModule;
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
options::register(module)
|
||||
}
|
||||
172
crates/qroissant-python/src/repr/options.rs
Normal file
172
crates/qroissant-python/src/repr/options.rs
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
//! Global repr formatting options and associated Python types.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyModule;
|
||||
|
||||
/// Row selection strategy used by qroissant repr formatting.
|
||||
#[derive(PartialEq, Eq, Default, Clone, Copy, Debug)]
|
||||
#[pyclass(
|
||||
eq,
|
||||
eq_int,
|
||||
frozen,
|
||||
rename_all = "SCREAMING_SNAKE_CASE",
|
||||
module = "qroissant"
|
||||
)]
|
||||
pub enum RowDisplay {
|
||||
/// Show the first `max_rows` rows followed by an ellipsis when truncated.
|
||||
#[default]
|
||||
Head,
|
||||
/// Show the first half and last half of rows with an ellipsis in the middle.
|
||||
HeadTail,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl RowDisplay {
|
||||
fn __repr__(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Head => "RowDisplay.HEAD",
|
||||
Self::HeadTail => "RowDisplay.HEAD_TAIL",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Formatting options for user-facing qroissant string representations.
|
||||
///
|
||||
/// Notes
|
||||
/// -----
|
||||
/// These options control how qroissant values render through `str(...)` and
|
||||
/// `repr(...)`. Apply them process-wide through `set_formatting_options(...)`.
|
||||
#[pyclass(get_all, eq, frozen, skip_from_py_object, module = "qroissant")]
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
pub struct FormattingOptions {
|
||||
pub max_rows: usize,
|
||||
pub max_columns: usize,
|
||||
pub row_display: RowDisplay,
|
||||
}
|
||||
|
||||
impl Default for FormattingOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_rows: 8,
|
||||
max_columns: 6,
|
||||
row_display: RowDisplay::Head,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FormattingOptions {
|
||||
#[staticmethod]
|
||||
/// Create a builder initialized with qroissant's default formatting policy.
|
||||
fn builder() -> FormattingOptionsBuilder {
|
||||
FormattingOptionsBuilder::default()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
format!(
|
||||
"FormattingOptions(max_rows={}, max_columns={}, row_display={})",
|
||||
self.max_rows,
|
||||
self.max_columns,
|
||||
self.row_display.__repr__(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder for [`FormattingOptions`].
|
||||
#[pyclass(skip_from_py_object, module = "qroissant")]
|
||||
#[derive(Default, Clone, Debug)]
|
||||
pub struct FormattingOptionsBuilder {
|
||||
inner: FormattingOptions,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FormattingOptionsBuilder {
|
||||
#[pyo3(signature = (value, /))]
|
||||
fn with_max_rows(&self, value: usize) -> Self {
|
||||
let mut b = self.clone();
|
||||
b.inner.max_rows = value;
|
||||
b
|
||||
}
|
||||
|
||||
#[pyo3(signature = (value, /))]
|
||||
fn with_max_columns(&self, value: usize) -> Self {
|
||||
let mut b = self.clone();
|
||||
b.inner.max_columns = value;
|
||||
b
|
||||
}
|
||||
|
||||
#[pyo3(signature = (value, /))]
|
||||
fn with_row_display(&self, value: RowDisplay) -> Self {
|
||||
let mut b = self.clone();
|
||||
b.inner.row_display = value;
|
||||
b
|
||||
}
|
||||
|
||||
/// Finalize the builder into an immutable `FormattingOptions` instance.
|
||||
fn build(&self) -> FormattingOptions {
|
||||
self.inner.clone()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
format!("FormattingOptionsBuilder({})", self.inner.__repr__())
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Global state
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn options_lock() -> &'static RwLock<FormattingOptions> {
|
||||
static OPTIONS: OnceLock<RwLock<FormattingOptions>> = OnceLock::new();
|
||||
OPTIONS.get_or_init(|| RwLock::new(FormattingOptions::default()))
|
||||
}
|
||||
|
||||
pub fn active_options() -> FormattingOptions {
|
||||
match options_lock().read() {
|
||||
Ok(guard) => guard.clone(),
|
||||
Err(poisoned) => poisoned.into_inner().clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn store_options(options: FormattingOptions) {
|
||||
match options_lock().write() {
|
||||
Ok(mut guard) => *guard = options,
|
||||
Err(poisoned) => *poisoned.into_inner() = options,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Python-visible functions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[pyfunction]
|
||||
/// Return the active qroissant repr formatting options.
|
||||
pub fn get_formatting_options() -> FormattingOptions {
|
||||
active_options()
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (options, /))]
|
||||
/// Update the active qroissant repr formatting options.
|
||||
pub fn set_formatting_options(options: PyRef<'_, FormattingOptions>) {
|
||||
store_options(options.clone());
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
/// Restore qroissant's default repr formatting options.
|
||||
pub fn reset_formatting_options() {
|
||||
store_options(FormattingOptions::default());
|
||||
}
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
module.add_class::<RowDisplay>()?;
|
||||
module.add_class::<FormattingOptions>()?;
|
||||
module.add_class::<FormattingOptionsBuilder>()?;
|
||||
module.add_function(wrap_pyfunction!(get_formatting_options, module)?)?;
|
||||
module.add_function(wrap_pyfunction!(set_formatting_options, module)?)?;
|
||||
module.add_function(wrap_pyfunction!(reset_formatting_options, module)?)?;
|
||||
Ok(())
|
||||
}
|
||||
80
crates/qroissant-python/src/repr/render.rs
Normal file
80
crates/qroissant-python/src/repr/render.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
//! ASCII table rendering via the `tabled` crate and row-slot utilities.
|
||||
|
||||
use tabled::builder::Builder;
|
||||
use tabled::settings::Alignment;
|
||||
use tabled::settings::Modify;
|
||||
use tabled::settings::Style;
|
||||
use tabled::settings::object::Rows;
|
||||
use tabled::settings::style::HorizontalLine;
|
||||
|
||||
use super::options::RowDisplay;
|
||||
|
||||
/// A slot in a preview: either a concrete row index or an ellipsis separator.
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum PreviewSlot {
|
||||
Index(usize),
|
||||
Ellipsis,
|
||||
}
|
||||
|
||||
/// Compute the row slots to show when rendering at most `max_rows` out of
|
||||
/// `total`, using `row_display` to decide whether to use head or head+tail.
|
||||
pub fn preview_slots(total: usize, max_rows: usize, row_display: RowDisplay) -> Vec<PreviewSlot> {
|
||||
if total == 0 || max_rows == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
if total <= max_rows {
|
||||
return (0..total).map(PreviewSlot::Index).collect();
|
||||
}
|
||||
|
||||
match row_display {
|
||||
RowDisplay::Head => {
|
||||
let mut slots = (0..max_rows).map(PreviewSlot::Index).collect::<Vec<_>>();
|
||||
slots.push(PreviewSlot::Ellipsis);
|
||||
slots
|
||||
}
|
||||
RowDisplay::HeadTail if max_rows == 1 => {
|
||||
vec![PreviewSlot::Index(0), PreviewSlot::Ellipsis]
|
||||
}
|
||||
RowDisplay::HeadTail => {
|
||||
let head = max_rows.div_ceil(2);
|
||||
let tail = max_rows / 2;
|
||||
let mut slots = (0..head).map(PreviewSlot::Index).collect::<Vec<_>>();
|
||||
slots.push(PreviewSlot::Ellipsis);
|
||||
let tail_start = total.saturating_sub(tail);
|
||||
slots.extend((tail_start..total).map(PreviewSlot::Index));
|
||||
slots
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Build an ASCII table with a modern style and a horizontal line after the
|
||||
/// header row.
|
||||
pub fn render_table(headers: Vec<String>, rows: Vec<Vec<String>>) -> String {
|
||||
let mut builder = Builder::default();
|
||||
builder.push_record(headers);
|
||||
for row in rows {
|
||||
builder.push_record(row);
|
||||
}
|
||||
let mut table = builder.build();
|
||||
table.with(
|
||||
Style::modern()
|
||||
.remove_horizontal()
|
||||
.horizontals([(1, HorizontalLine::inherit(Style::modern()))]),
|
||||
);
|
||||
table.with(Modify::new(Rows::first()).with(Alignment::left()));
|
||||
table.to_string()
|
||||
}
|
||||
|
||||
/// Assemble a full repr block: optional title lines, a table, optional footer.
|
||||
pub fn render_preview(
|
||||
title_lines: Vec<String>,
|
||||
headers: Vec<String>,
|
||||
rows: Vec<Vec<String>>,
|
||||
footer_lines: Vec<String>,
|
||||
) -> String {
|
||||
let mut sections = title_lines;
|
||||
sections.push(render_table(headers, rows));
|
||||
sections.extend(footer_lines);
|
||||
sections.join("\n")
|
||||
}
|
||||
215
crates/qroissant-python/src/serde.rs
Normal file
215
crates/qroissant-python/src/serde.rs
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyAny;
|
||||
use pyo3::types::PyBytes;
|
||||
use qroissant_arrow::ListProjection;
|
||||
use qroissant_arrow::ProjectionOptions;
|
||||
use qroissant_arrow::StringProjection;
|
||||
use qroissant_arrow::SymbolProjection;
|
||||
use qroissant_core::DecodeOptions as CoreDecodeOptions;
|
||||
use qroissant_core::Value as CoreValue;
|
||||
use qroissant_core::decode_message_with_options;
|
||||
use qroissant_core::encode_message;
|
||||
use qroissant_transport::extract_q_error;
|
||||
|
||||
use crate::errors::PythonError;
|
||||
use crate::errors::PythonResult;
|
||||
use crate::errors::to_py_err;
|
||||
use crate::types::Compression;
|
||||
use crate::types::DecodeOptions;
|
||||
use crate::types::EncodeOptions;
|
||||
use crate::types::Encoding;
|
||||
use crate::types::ListInterpretation;
|
||||
use crate::types::MessageType;
|
||||
use crate::types::StringInterpretation;
|
||||
use crate::types::SymbolInterpretation;
|
||||
use crate::values::core_value_to_python_with_opts;
|
||||
use crate::values::python_to_core_value;
|
||||
|
||||
/// Maps Python-facing "Interpretation" options to Rust-internal "Projection" options.
|
||||
///
|
||||
/// The Python API uses "Interpretation" (e.g. `SymbolInterpretation`) as it describes
|
||||
/// how the user wants data to be interpreted. The Rust/Arrow layer uses "Projection"
|
||||
/// (e.g. `SymbolProjection`) as it describes how values are projected into Arrow arrays.
|
||||
/// Both refer to the same concept viewed from different perspectives.
|
||||
pub fn decode_options_to_proj_opts(opts: Option<&DecodeOptions>) -> Arc<ProjectionOptions> {
|
||||
let opts = opts.map(|o| o.clone()).unwrap_or_default();
|
||||
Arc::new(ProjectionOptions {
|
||||
symbol: match opts.symbol_interpretation_value() {
|
||||
SymbolInterpretation::Utf8 => SymbolProjection::Utf8,
|
||||
SymbolInterpretation::LargeUtf8 => SymbolProjection::LargeUtf8,
|
||||
SymbolInterpretation::Utf8View => SymbolProjection::Utf8View,
|
||||
SymbolInterpretation::Dictionary => SymbolProjection::Dictionary,
|
||||
SymbolInterpretation::RawBytes => SymbolProjection::RawBytes,
|
||||
},
|
||||
string: match opts.string_interpretation_value() {
|
||||
StringInterpretation::Utf8 => StringProjection::Utf8,
|
||||
StringInterpretation::Binary => StringProjection::Binary,
|
||||
},
|
||||
list: match opts.list_interpretation_value() {
|
||||
ListInterpretation::List => ListProjection::List,
|
||||
ListInterpretation::LargeList => ListProjection::LargeList,
|
||||
ListInterpretation::ListView => ListProjection::ListView,
|
||||
},
|
||||
union_mode: match opts.union_mode_value() {
|
||||
crate::types::UnionMode::Dense => qroissant_arrow::UnionMode::Dense,
|
||||
crate::types::UnionMode::Sparse => qroissant_arrow::UnionMode::Sparse,
|
||||
},
|
||||
treat_infinity_as_null: opts.treat_infinity_as_null(),
|
||||
parallel: opts.parallel_value(),
|
||||
assume_symbol_utf8: opts.assume_symbol_utf8_value(),
|
||||
})
|
||||
}
|
||||
|
||||
fn decode_options_to_core(opts: &DecodeOptions) -> CoreDecodeOptions {
|
||||
CoreDecodeOptions {
|
||||
parallel: opts.parallel_value(),
|
||||
..CoreDecodeOptions::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn ensure_default_encode_options(options: Option<&EncodeOptions>) -> PythonResult<()> {
|
||||
if let Some(options) = options
|
||||
&& options != &EncodeOptions::default()
|
||||
{
|
||||
return Err(PythonError::NotImplemented(
|
||||
"custom encode options are not implemented yet".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn decode_core_value(
|
||||
payload: bytes::Bytes,
|
||||
options: Option<&DecodeOptions>,
|
||||
) -> PythonResult<(CoreValue, Arc<ProjectionOptions>)> {
|
||||
if let Some(message) =
|
||||
extract_q_error(payload.as_ref()).map_err(crate::errors::map_transport_error)?
|
||||
{
|
||||
return Err(PythonError::QRuntime(message));
|
||||
}
|
||||
let core_opts = options.map(decode_options_to_core).unwrap_or_default();
|
||||
let decoded = decode_message_with_options(payload, &core_opts)
|
||||
.map_err(|error| PythonError::Decode(error.to_string()))?;
|
||||
let proj_opts = decode_options_to_proj_opts(options);
|
||||
let (_header, value) = decoded.into_parts();
|
||||
Ok((value, proj_opts))
|
||||
}
|
||||
|
||||
/// Wraps a Python `bytes` object in a [`bytes::Bytes`] without copying.
|
||||
///
|
||||
/// CPython `bytes` objects are immutable and their backing memory is never
|
||||
/// moved, so it is sound to hold a raw pointer into them as long as the
|
||||
/// `Py<PyBytes>` reference (which increments the CPython refcount) is alive.
|
||||
struct PinnedPyBytes {
|
||||
_owner: Py<PyBytes>,
|
||||
ptr: *const u8,
|
||||
len: usize,
|
||||
}
|
||||
|
||||
// SAFETY: `Py<PyBytes>` is `Send`, and the pointed-to memory is immutable.
|
||||
unsafe impl Send for PinnedPyBytes {}
|
||||
// SAFETY: The data is immutable and the owner keeps it alive.
|
||||
unsafe impl Sync for PinnedPyBytes {}
|
||||
|
||||
impl AsRef<[u8]> for PinnedPyBytes {
|
||||
#[inline]
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
// SAFETY: `ptr` is valid for `len` bytes while `_owner` keeps the
|
||||
// CPython bytes object alive (refcount > 0, no deallocation possible).
|
||||
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
|
||||
}
|
||||
}
|
||||
|
||||
/// Minimum payload size for the zero-copy `PinnedPyBytes` path.
|
||||
///
|
||||
/// For small payloads the `Arc` allocation inside `Bytes::from_owner` costs
|
||||
/// more than a plain `memcpy`, so we fall back to copying below this threshold.
|
||||
const ZERO_COPY_MIN_BYTES: usize = 32 * 1024; // 32 KB
|
||||
|
||||
/// Converts a Python `bytes`-like object into a [`bytes::Bytes`].
|
||||
///
|
||||
/// For plain `bytes` objects ≥ [`ZERO_COPY_MIN_BYTES`] the underlying buffer
|
||||
/// is **borrowed without copying** via [`bytes::Bytes::from_owner`].
|
||||
/// Smaller payloads and other buffer protocols (bytearray, memoryview) take a
|
||||
/// single copy — same cost as before.
|
||||
fn payload_to_bytes(payload: &Bound<'_, PyAny>) -> PyResult<bytes::Bytes> {
|
||||
if let Ok(pb) = payload.downcast::<PyBytes>() {
|
||||
let data = pb.as_bytes();
|
||||
if data.len() >= ZERO_COPY_MIN_BYTES {
|
||||
let pinned = PinnedPyBytes {
|
||||
_owner: pb.clone().unbind(),
|
||||
ptr: data.as_ptr(),
|
||||
len: data.len(),
|
||||
};
|
||||
return Ok(bytes::Bytes::from_owner(pinned));
|
||||
}
|
||||
return Ok(bytes::Bytes::copy_from_slice(data));
|
||||
}
|
||||
Ok(bytes::Bytes::from(payload.extract::<Vec<u8>>()?))
|
||||
}
|
||||
|
||||
pub fn encode_core_value_bytes(
|
||||
value: &CoreValue,
|
||||
options: Option<&EncodeOptions>,
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
) -> PythonResult<Vec<u8>> {
|
||||
ensure_default_encode_options(options)?;
|
||||
encode_message(
|
||||
value,
|
||||
encoding.into(),
|
||||
message_type.into(),
|
||||
compression.into(),
|
||||
)
|
||||
.map_err(|error| PythonError::Protocol(error.to_string()))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (payload, /, *, options=None))]
|
||||
pub fn decode(
|
||||
py: Python<'_>,
|
||||
payload: &Bound<'_, PyAny>,
|
||||
options: Option<&DecodeOptions>,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let bytes = payload_to_bytes(payload)?;
|
||||
let options_clone = options.cloned();
|
||||
let (value, proj_opts) = py
|
||||
.detach(|| decode_core_value(bytes, options_clone.as_ref()))
|
||||
.map_err(to_py_err)?;
|
||||
core_value_to_python_with_opts(py, value, proj_opts)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (value, /, *, options=None, encoding=Encoding::LittleEndian, message_type=MessageType::Asynchronous, compression=Compression::Uncompressed))]
|
||||
pub fn encode(
|
||||
py: Python<'_>,
|
||||
value: &Bound<'_, PyAny>,
|
||||
options: Option<&EncodeOptions>,
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
) -> PyResult<Py<PyBytes>> {
|
||||
let value = python_to_core_value(value)?;
|
||||
let options_clone = options.cloned();
|
||||
let payload = py
|
||||
.detach(|| {
|
||||
encode_core_value_bytes(
|
||||
&value,
|
||||
options_clone.as_ref(),
|
||||
encoding,
|
||||
message_type,
|
||||
compression,
|
||||
)
|
||||
})
|
||||
.map_err(to_py_err)?;
|
||||
Ok(PyBytes::new(py, &payload).unbind())
|
||||
}
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
module.add_function(wrap_pyfunction!(decode, module)?)?;
|
||||
module.add_function(wrap_pyfunction!(encode, module)?)?;
|
||||
Ok(())
|
||||
}
|
||||
1325
crates/qroissant-python/src/types.rs
Normal file
1325
crates/qroissant-python/src/types.rs
Normal file
File diff suppressed because it is too large
Load diff
925
crates/qroissant-python/src/values.rs
Normal file
925
crates/qroissant-python/src/values.rs
Normal file
|
|
@ -0,0 +1,925 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use pyo3::exceptions::PyIndexError;
|
||||
use pyo3::exceptions::PyKeyError;
|
||||
use pyo3::exceptions::PyNotImplementedError;
|
||||
use pyo3::exceptions::PyValueError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::PyAny;
|
||||
use pyo3::types::PyBytes;
|
||||
use pyo3::types::PyCapsule;
|
||||
use pyo3::types::PyDict;
|
||||
use pyo3::types::PyIterator;
|
||||
use pyo3::types::PyList;
|
||||
use pyo3::types::PyTuple;
|
||||
use pyo3_arrow::ffi::ArrayIterator;
|
||||
use pyo3_arrow::ffi::to_array_pycapsules;
|
||||
use pyo3_arrow::ffi::to_stream_pycapsule;
|
||||
use qroissant_arrow::IngestionError;
|
||||
use qroissant_arrow::ProjectionOptions;
|
||||
use qroissant_arrow::ingest_array;
|
||||
use qroissant_arrow::ingest_record_batch;
|
||||
use qroissant_arrow::ingest_record_batch_reader;
|
||||
use qroissant_arrow::project;
|
||||
use qroissant_arrow::project_table;
|
||||
use qroissant_core::Atom as CoreAtom;
|
||||
use qroissant_core::Dictionary as CoreDictionary;
|
||||
use qroissant_core::List as CoreList;
|
||||
use qroissant_core::Table as CoreTable;
|
||||
use qroissant_core::Value as CoreValue;
|
||||
use qroissant_core::Vector as CoreVector;
|
||||
use qroissant_core::VectorData;
|
||||
|
||||
use crate::errors::to_py_err;
|
||||
use crate::types::Attribute;
|
||||
use crate::types::Compression;
|
||||
use crate::types::Encoding;
|
||||
use crate::types::MessageType;
|
||||
use crate::types::Primitive;
|
||||
use crate::types::Shape;
|
||||
use crate::types::Type;
|
||||
|
||||
#[pyclass(subclass, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Value {
|
||||
inner: CoreValue,
|
||||
projection_opts: Arc<ProjectionOptions>,
|
||||
}
|
||||
|
||||
impl Value {
|
||||
pub fn new(inner: CoreValue) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
projection_opts: Arc::new(ProjectionOptions::default()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_with_opts(inner: CoreValue, opts: Arc<ProjectionOptions>) -> Self {
|
||||
Self {
|
||||
inner,
|
||||
projection_opts: opts,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> &CoreValue {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> CoreValue {
|
||||
self.inner
|
||||
}
|
||||
|
||||
pub fn projection_opts(&self) -> &Arc<ProjectionOptions> {
|
||||
&self.projection_opts
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl Value {
|
||||
#[getter]
|
||||
fn qtype(&self) -> Type {
|
||||
Type::from(self.inner.qtype())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn primitive(&self) -> Option<Primitive> {
|
||||
self.inner.qtype().primitive.map(Primitive::from)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn shape(&self) -> Shape {
|
||||
Shape::from(self.inner.qtype().shape)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn attribute(&self) -> Option<Attribute> {
|
||||
self.inner.qtype().attribute.map(Attribute::from)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*, options=None, encoding=Encoding::LittleEndian, message_type=MessageType::Asynchronous, compression=Compression::Uncompressed))]
|
||||
fn serialize(
|
||||
&self,
|
||||
options: Option<&crate::types::EncodeOptions>,
|
||||
encoding: Encoding,
|
||||
message_type: MessageType,
|
||||
compression: Compression,
|
||||
) -> PyResult<Py<PyBytes>> {
|
||||
let inner = self.inner.clone();
|
||||
let options_clone = options.cloned();
|
||||
Python::attach(|py| {
|
||||
let payload = py
|
||||
.detach(|| {
|
||||
crate::serde::encode_core_value_bytes(
|
||||
&inner,
|
||||
options_clone.as_ref(),
|
||||
encoding,
|
||||
message_type,
|
||||
compression,
|
||||
)
|
||||
})
|
||||
.map_err(to_py_err)?;
|
||||
Ok(PyBytes::new(py, &payload).unbind())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends = Value, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Atom;
|
||||
|
||||
#[pymethods]
|
||||
impl Atom {
|
||||
#[new]
|
||||
fn new(qtype: PyRef<'_, Type>, value: &Bound<'_, PyAny>) -> PyResult<(Self, Value)> {
|
||||
let core = atom_from_python(&qtype, value)?;
|
||||
Ok((Self, Value::new(CoreValue::Atom(core))))
|
||||
}
|
||||
|
||||
fn as_py(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Atom(atom) => atom_to_python(py, atom),
|
||||
_ => unreachable!("Atom instances always hold q atoms"),
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn value(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
Self::as_py(slf, py)
|
||||
}
|
||||
|
||||
fn is_null(slf: PyRef<'_, Self>) -> bool {
|
||||
use qroissant_kernels::nulls::*;
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Atom(atom) => match atom {
|
||||
CoreAtom::Boolean(_)
|
||||
| CoreAtom::Guid(_)
|
||||
| CoreAtom::Byte(_)
|
||||
| CoreAtom::Char(_)
|
||||
| CoreAtom::Symbol(_) => false,
|
||||
CoreAtom::Short(v) => *v == Q_NULL_SHORT,
|
||||
CoreAtom::Int(v) => *v == Q_NULL_INT,
|
||||
CoreAtom::Long(v) => *v == Q_NULL_LONG,
|
||||
CoreAtom::Real(v) => v.is_nan(),
|
||||
CoreAtom::Float(v) => v.is_nan(),
|
||||
CoreAtom::Timestamp(v) => *v == Q_NULL_TIMESTAMP,
|
||||
CoreAtom::Month(v) => *v == Q_NULL_MONTH,
|
||||
CoreAtom::Date(v) => *v == Q_NULL_DATE,
|
||||
CoreAtom::Datetime(v) => v.is_nan(),
|
||||
CoreAtom::Timespan(v) => *v == Q_NULL_TIMESPAN,
|
||||
CoreAtom::Minute(v) => *v == Q_NULL_MINUTE,
|
||||
CoreAtom::Second(v) => *v == Q_NULL_SECOND,
|
||||
CoreAtom::Time(v) => *v == Q_NULL_TIME,
|
||||
},
|
||||
_ => unreachable!("Atom instances always hold q atoms"),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_infinite(slf: PyRef<'_, Self>) -> bool {
|
||||
use qroissant_kernels::nulls::*;
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Atom(atom) => match atom {
|
||||
CoreAtom::Boolean(_)
|
||||
| CoreAtom::Guid(_)
|
||||
| CoreAtom::Byte(_)
|
||||
| CoreAtom::Char(_)
|
||||
| CoreAtom::Symbol(_) => false,
|
||||
CoreAtom::Short(v) => *v == Q_INF_SHORT || *v == Q_NINF_SHORT,
|
||||
CoreAtom::Int(v) => *v == Q_INF_INT || *v == Q_NINF_INT,
|
||||
CoreAtom::Long(v) => *v == Q_INF_LONG || *v == Q_NINF_LONG,
|
||||
CoreAtom::Real(v) => v.is_infinite(),
|
||||
CoreAtom::Float(v) => v.is_infinite(),
|
||||
CoreAtom::Timestamp(v) => *v == Q_INF_TIMESTAMP || *v == Q_NINF_TIMESTAMP,
|
||||
CoreAtom::Month(v) => *v == Q_INF_MONTH || *v == Q_NINF_MONTH,
|
||||
CoreAtom::Date(v) => *v == Q_INF_DATE || *v == Q_NINF_DATE,
|
||||
CoreAtom::Datetime(v) => v.is_infinite(),
|
||||
CoreAtom::Timespan(v) => *v == Q_INF_TIMESPAN || *v == Q_NINF_TIMESPAN,
|
||||
CoreAtom::Minute(v) => *v == Q_INF_MINUTE || *v == Q_NINF_MINUTE,
|
||||
CoreAtom::Second(v) => *v == Q_INF_SECOND || *v == Q_NINF_SECOND,
|
||||
CoreAtom::Time(v) => *v == Q_INF_TIME || *v == Q_NINF_TIME,
|
||||
},
|
||||
_ => unreachable!("Atom instances always hold q atoms"),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(signature = (requested_schema=None))]
|
||||
fn __arrow_c_array__(
|
||||
slf: PyRef<'_, Self>,
|
||||
py: Python<'_>,
|
||||
requested_schema: Option<Bound<'_, PyAny>>,
|
||||
) -> PyResult<Py<PyTuple>> {
|
||||
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||
.map(|s| s.downcast_into::<PyCapsule>())
|
||||
.transpose()?;
|
||||
let opts = slf.as_super().projection_opts().clone();
|
||||
let export = project(slf.as_super().inner(), &opts)
|
||||
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||
let capsules =
|
||||
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||
Ok(capsules.unbind())
|
||||
}
|
||||
|
||||
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Atom(atom) => crate::repr::format_atom(atom),
|
||||
_ => unreachable!("Atom instances always hold q atoms"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||
Self::__repr__(slf)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends = Value, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Vector;
|
||||
|
||||
#[pymethods]
|
||||
impl Vector {
|
||||
#[new]
|
||||
fn new(qtype: PyRef<'_, Type>, values: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||
let core = if let Some(values) = values {
|
||||
vector_from_python(&qtype, values)?
|
||||
} else {
|
||||
let empty = PyList::empty(qtype.py());
|
||||
vector_from_python(&qtype, empty.as_any())?
|
||||
};
|
||||
Ok((Self, Value::new(CoreValue::Vector(core))))
|
||||
}
|
||||
|
||||
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Vector(vector) => vector.len(),
|
||||
_ => unreachable!("Vector instances always hold q vectors"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __iter__(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
let list = Self::to_list(slf, py)?;
|
||||
let iter = PyIterator::from_object(list.bind(py).as_any())?;
|
||||
Ok(iter.into_any().unbind())
|
||||
}
|
||||
|
||||
fn __getitem__(slf: PyRef<'_, Self>, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {
|
||||
let vector = match slf.as_super().inner() {
|
||||
CoreValue::Vector(vector) => vector,
|
||||
_ => unreachable!("Vector instances always hold q vectors"),
|
||||
};
|
||||
let index = normalize_index(index, vector.len())?;
|
||||
vector_item_to_python(py, vector, index)
|
||||
}
|
||||
|
||||
fn to_list(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyList>> {
|
||||
let vector = match slf.as_super().inner() {
|
||||
CoreValue::Vector(vector) => vector,
|
||||
_ => unreachable!("Vector instances always hold q vectors"),
|
||||
};
|
||||
vector_to_pylist(py, vector)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (requested_schema=None))]
|
||||
fn __arrow_c_array__(
|
||||
slf: PyRef<'_, Self>,
|
||||
py: Python<'_>,
|
||||
requested_schema: Option<Bound<'_, PyAny>>,
|
||||
) -> PyResult<Py<PyTuple>> {
|
||||
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||
.map(|s| s.downcast_into::<PyCapsule>())
|
||||
.transpose()?;
|
||||
let opts = slf.as_super().projection_opts().clone();
|
||||
let export = project(slf.as_super().inner(), &opts)
|
||||
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||
let capsules =
|
||||
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||
Ok(capsules.unbind())
|
||||
}
|
||||
|
||||
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Vector(vector) => crate::repr::format_vector(vector),
|
||||
_ => unreachable!("Vector instances always hold q vectors"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||
Self::__repr__(slf)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends = Value, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct List;
|
||||
|
||||
#[pymethods]
|
||||
impl List {
|
||||
#[new]
|
||||
fn new(qtype: PyRef<'_, Type>, values: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||
let core = if let Some(values) = values {
|
||||
list_from_python(&qtype, values)?
|
||||
} else {
|
||||
let empty = PyList::empty(qtype.py());
|
||||
list_from_python(&qtype, empty.as_any())?
|
||||
};
|
||||
Ok((Self, Value::new(CoreValue::List(core))))
|
||||
}
|
||||
|
||||
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::List(list) => list.len(),
|
||||
_ => unreachable!("List instances always hold q lists"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __iter__(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
let list = Self::to_list(slf, py)?;
|
||||
let iter = PyIterator::from_object(list.bind(py).as_any())?;
|
||||
Ok(iter.into_any().unbind())
|
||||
}
|
||||
|
||||
fn __getitem__(slf: PyRef<'_, Self>, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {
|
||||
let list = match slf.as_super().inner() {
|
||||
CoreValue::List(list) => list,
|
||||
_ => unreachable!("List instances always hold q lists"),
|
||||
};
|
||||
let index = normalize_index(index, list.len())?;
|
||||
core_value_to_python(py, list.values()[index].clone())
|
||||
}
|
||||
|
||||
fn to_list(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyList>> {
|
||||
let list = match slf.as_super().inner() {
|
||||
CoreValue::List(list) => list,
|
||||
_ => unreachable!("List instances always hold q lists"),
|
||||
};
|
||||
let mut values = Vec::with_capacity(list.len());
|
||||
for value in list.values() {
|
||||
values.push(core_value_to_python(py, value.clone())?);
|
||||
}
|
||||
Ok(PyList::new(py, values)?.unbind())
|
||||
}
|
||||
|
||||
#[pyo3(signature = (requested_schema=None))]
|
||||
fn __arrow_c_array__(
|
||||
slf: PyRef<'_, Self>,
|
||||
py: Python<'_>,
|
||||
requested_schema: Option<Bound<'_, PyAny>>,
|
||||
) -> PyResult<Py<PyTuple>> {
|
||||
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||
.map(|s| s.downcast_into::<PyCapsule>())
|
||||
.transpose()?;
|
||||
let opts = slf.as_super().projection_opts().clone();
|
||||
let export = project(slf.as_super().inner(), &opts)
|
||||
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||
let capsules =
|
||||
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||
Ok(capsules.unbind())
|
||||
}
|
||||
|
||||
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::List(list) => crate::repr::format_list(list),
|
||||
_ => unreachable!("List instances always hold q lists"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||
Self::__repr__(slf)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends = Value, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Dictionary;
|
||||
|
||||
#[pymethods]
|
||||
impl Dictionary {
|
||||
#[new]
|
||||
fn new(
|
||||
qtype: PyRef<'_, Type>,
|
||||
keys: &Bound<'_, PyAny>,
|
||||
values: &Bound<'_, PyAny>,
|
||||
) -> PyResult<(Self, Value)> {
|
||||
let core = dictionary_from_python(&qtype, keys, values)?;
|
||||
Ok((Self, Value::new(CoreValue::Dictionary(core))))
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn keys(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Dictionary(dictionary) => {
|
||||
core_value_to_python(py, dictionary.keys().clone())
|
||||
}
|
||||
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn values(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Dictionary(dictionary) => {
|
||||
core_value_to_python(py, dictionary.values().clone())
|
||||
}
|
||||
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Dictionary(dictionary) => dictionary.len(),
|
||||
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(signature = (requested_schema=None))]
|
||||
fn __arrow_c_array__(
|
||||
slf: PyRef<'_, Self>,
|
||||
py: Python<'_>,
|
||||
requested_schema: Option<Bound<'_, PyAny>>,
|
||||
) -> PyResult<Py<PyTuple>> {
|
||||
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||
.map(|s| s.downcast_into::<PyCapsule>())
|
||||
.transpose()?;
|
||||
let opts = slf.as_super().projection_opts().clone();
|
||||
let export = project(slf.as_super().inner(), &opts)
|
||||
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||
let capsules =
|
||||
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||
Ok(capsules.unbind())
|
||||
}
|
||||
|
||||
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Dictionary(dict) => crate::repr::format_dictionary(dict),
|
||||
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||
Self::__repr__(slf)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(extends = Value, module = "qroissant")]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Table;
|
||||
|
||||
#[pymethods]
|
||||
impl Table {
|
||||
#[new]
|
||||
fn new(qtype: PyRef<'_, Type>, columns: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||
let core = if let Some(columns) = columns {
|
||||
table_from_python(&qtype, columns)?
|
||||
} else {
|
||||
let empty = PyDict::new(qtype.py());
|
||||
table_from_python(&qtype, empty.as_any())?
|
||||
};
|
||||
Ok((Self, Value::new(CoreValue::Table(core))))
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn columns(slf: PyRef<'_, Self>) -> PyResult<Vec<String>> {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Table(table) => table
|
||||
.column_names()
|
||||
.iter()
|
||||
.map(|name| {
|
||||
String::from_utf8(name.to_vec()).map_err(|_| {
|
||||
PyValueError::new_err("q table column names must be valid UTF-8 for now")
|
||||
})
|
||||
})
|
||||
.collect(),
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn num_rows(slf: PyRef<'_, Self>) -> usize {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Table(table) => table.len(),
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
}
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn num_columns(slf: PyRef<'_, Self>) -> usize {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Table(table) => table.num_columns(),
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
}
|
||||
}
|
||||
|
||||
fn column(slf: PyRef<'_, Self>, py: Python<'_>, name: &str) -> PyResult<Py<PyAny>> {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Table(table) => {
|
||||
let needle = name.as_bytes();
|
||||
for (idx, candidate) in table.column_names().iter().enumerate() {
|
||||
if candidate.as_ref() == needle {
|
||||
return core_value_to_python(py, table.columns()[idx].clone());
|
||||
}
|
||||
}
|
||||
Err(PyKeyError::new_err(name.to_string()))
|
||||
}
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
}
|
||||
}
|
||||
|
||||
#[pyo3(signature = (requested_schema=None))]
|
||||
fn __arrow_c_stream__(
|
||||
slf: PyRef<'_, Self>,
|
||||
py: Python<'_>,
|
||||
requested_schema: Option<Bound<'_, PyAny>>,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||
.map(|s| s.downcast_into::<PyCapsule>())
|
||||
.transpose()?;
|
||||
let table = match slf.as_super().inner() {
|
||||
qroissant_core::Value::Table(t) => t.clone(),
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
};
|
||||
let opts = slf.as_super().projection_opts().clone();
|
||||
let export = py
|
||||
.detach(|| project_table(&table, &opts).map_err(|e| e.to_string()))
|
||||
.map_err(|e| PyNotImplementedError::new_err(e))?;
|
||||
let reader = ArrayIterator::new(vec![Ok(export.struct_array)], export.struct_field);
|
||||
let capsule = to_stream_pycapsule(py, Box::new(reader), schema_capsule)?;
|
||||
Ok(capsule.into_any().unbind())
|
||||
}
|
||||
|
||||
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||
match slf.as_super().inner() {
|
||||
CoreValue::Table(table) => crate::repr::format_table(table),
|
||||
_ => unreachable!("Table instances always hold q tables"),
|
||||
}
|
||||
}
|
||||
|
||||
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||
Self::__repr__(slf)
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_index(index: isize, len: usize) -> PyResult<usize> {
|
||||
let len = len as isize;
|
||||
let index = if index < 0 { len + index } else { index };
|
||||
if !(0..len).contains(&index) {
|
||||
return Err(PyIndexError::new_err("index out of range"));
|
||||
}
|
||||
Ok(index as usize)
|
||||
}
|
||||
|
||||
fn bytes_or_utf8(value: &Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
|
||||
if let Ok(bytes) = value.extract::<Vec<u8>>() {
|
||||
return Ok(bytes);
|
||||
}
|
||||
Ok(value.extract::<String>()?.into_bytes())
|
||||
}
|
||||
|
||||
fn atom_to_python(py: Python<'_>, atom: &CoreAtom) -> PyResult<Py<PyAny>> {
|
||||
match atom {
|
||||
CoreAtom::Boolean(value) => Ok(value.into_pyobject(py)?.to_owned().unbind().into_any()),
|
||||
CoreAtom::Guid(value) => Ok(PyBytes::new(py, value).unbind().into_any()),
|
||||
CoreAtom::Byte(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Short(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Int(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Long(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Real(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Float(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Char(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Symbol(value) => Ok(PyBytes::new(py, value).unbind().into_any()),
|
||||
CoreAtom::Timestamp(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Month(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Date(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Datetime(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Timespan(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Minute(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Second(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
CoreAtom::Time(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||
}
|
||||
}
|
||||
|
||||
fn atom_from_python(qtype: &Type, value: &Bound<'_, PyAny>) -> PyResult<CoreAtom> {
|
||||
ensure_shape(qtype, Shape::Atom)?;
|
||||
let primitive = qtype
|
||||
.primitive_value()
|
||||
.ok_or_else(|| PyValueError::new_err("atom qtype requires a primitive"))?;
|
||||
match primitive {
|
||||
Primitive::Boolean => Ok(CoreAtom::Boolean(value.extract()?)),
|
||||
Primitive::Guid => {
|
||||
let bytes = value.extract::<Vec<u8>>()?;
|
||||
let guid: [u8; 16] = bytes.try_into().map_err(|_| {
|
||||
PyValueError::new_err("guid atoms must be backed by exactly 16 bytes")
|
||||
})?;
|
||||
Ok(CoreAtom::Guid(guid))
|
||||
}
|
||||
Primitive::Byte => Ok(CoreAtom::Byte(value.extract()?)),
|
||||
Primitive::Short => Ok(CoreAtom::Short(value.extract()?)),
|
||||
Primitive::Int => Ok(CoreAtom::Int(value.extract()?)),
|
||||
Primitive::Long => Ok(CoreAtom::Long(value.extract()?)),
|
||||
Primitive::Real => Ok(CoreAtom::Real(value.extract()?)),
|
||||
Primitive::Float => Ok(CoreAtom::Float(value.extract()?)),
|
||||
Primitive::Char => Ok(CoreAtom::Char(extract_char_like(value)?)),
|
||||
Primitive::Symbol => Ok(CoreAtom::Symbol(bytes::Bytes::from(bytes_or_utf8(value)?))),
|
||||
Primitive::Timestamp => Ok(CoreAtom::Timestamp(value.extract()?)),
|
||||
Primitive::Month => Ok(CoreAtom::Month(value.extract()?)),
|
||||
Primitive::Date => Ok(CoreAtom::Date(value.extract()?)),
|
||||
Primitive::Datetime => Ok(CoreAtom::Datetime(value.extract()?)),
|
||||
Primitive::Timespan => Ok(CoreAtom::Timespan(value.extract()?)),
|
||||
Primitive::Minute => Ok(CoreAtom::Minute(value.extract()?)),
|
||||
Primitive::Second => Ok(CoreAtom::Second(value.extract()?)),
|
||||
Primitive::Time => Ok(CoreAtom::Time(value.extract()?)),
|
||||
Primitive::Mixed => Err(PyValueError::new_err("mixed atoms are not valid q values")),
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_char_like(value: &Bound<'_, PyAny>) -> PyResult<u8> {
|
||||
if let Ok(byte) = value.extract::<u8>() {
|
||||
return Ok(byte);
|
||||
}
|
||||
let bytes = value.extract::<Vec<u8>>()?;
|
||||
let [byte] = <[u8; 1]>::try_from(bytes.as_slice())
|
||||
.map_err(|_| PyValueError::new_err("char values must be a single byte or integer"))?;
|
||||
Ok(byte)
|
||||
}
|
||||
|
||||
fn vector_from_python(qtype: &Type, values: &Bound<'_, PyAny>) -> PyResult<CoreVector> {
|
||||
ensure_shape(qtype, Shape::Vector)?;
|
||||
let primitive = qtype
|
||||
.primitive_value()
|
||||
.ok_or_else(|| PyValueError::new_err("vector qtype requires a primitive"))?;
|
||||
let list = values
|
||||
.cast::<PyList>()
|
||||
.map_err(|_| PyValueError::new_err("vector payloads must be Python lists"))?;
|
||||
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||
let data = match primitive {
|
||||
Primitive::Boolean => {
|
||||
let bools: Vec<bool> = extract_list(list, |item| item.extract())?;
|
||||
let bytes: Vec<u8> = bools.into_iter().map(|b| if b { 1 } else { 0 }).collect();
|
||||
VectorData::Boolean(bytes::Bytes::from(bytes))
|
||||
}
|
||||
Primitive::Guid => VectorData::from_guids(&extract_list(list, |item| {
|
||||
let bytes = item.extract::<Vec<u8>>()?;
|
||||
bytes
|
||||
.try_into()
|
||||
.map_err(|_| PyValueError::new_err("guid vector elements must be exactly 16 bytes"))
|
||||
})?),
|
||||
Primitive::Byte => VectorData::Byte(bytes::Bytes::from(extract_list(list, |item| {
|
||||
item.extract::<u8>()
|
||||
})?)),
|
||||
Primitive::Short => VectorData::from_i16s(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Int => VectorData::from_i32s(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Long => VectorData::from_i64s(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Real => VectorData::from_f32s(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Float => VectorData::from_f64s(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Char => {
|
||||
VectorData::Char(bytes::Bytes::from(extract_list(list, extract_char_like)?))
|
||||
}
|
||||
Primitive::Symbol => VectorData::Symbol(
|
||||
extract_list(list, bytes_or_utf8)?
|
||||
.into_iter()
|
||||
.map(bytes::Bytes::from)
|
||||
.collect(),
|
||||
),
|
||||
Primitive::Timestamp => {
|
||||
VectorData::from_timestamps(&extract_list(list, |item| item.extract())?)
|
||||
}
|
||||
Primitive::Month => VectorData::from_months(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Date => VectorData::from_dates(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Datetime => {
|
||||
VectorData::from_datetimes(&extract_list(list, |item| item.extract())?)
|
||||
}
|
||||
Primitive::Timespan => {
|
||||
VectorData::from_timespans(&extract_list(list, |item| item.extract())?)
|
||||
}
|
||||
Primitive::Minute => VectorData::from_minutes(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Second => VectorData::from_seconds(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Time => VectorData::from_times(&extract_list(list, |item| item.extract())?),
|
||||
Primitive::Mixed => {
|
||||
return Err(PyValueError::new_err(
|
||||
"mixed vectors must use List rather than Vector",
|
||||
));
|
||||
}
|
||||
};
|
||||
Ok(CoreVector::new(attribute, data))
|
||||
}
|
||||
|
||||
fn list_from_python(qtype: &Type, values: &Bound<'_, PyAny>) -> PyResult<CoreList> {
|
||||
ensure_shape(qtype, Shape::List)?;
|
||||
let list = values
|
||||
.cast::<PyList>()
|
||||
.map_err(|_| PyValueError::new_err("list payloads must be Python lists"))?;
|
||||
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||
let mut inner = Vec::with_capacity(list.len());
|
||||
for item in list.iter() {
|
||||
inner.push(python_to_core_value(&item)?);
|
||||
}
|
||||
Ok(CoreList::new(attribute, inner))
|
||||
}
|
||||
|
||||
fn dictionary_from_python(
|
||||
qtype: &Type,
|
||||
keys: &Bound<'_, PyAny>,
|
||||
values: &Bound<'_, PyAny>,
|
||||
) -> PyResult<CoreDictionary> {
|
||||
ensure_shape(qtype, Shape::Dictionary)?;
|
||||
let sorted = qtype.sorted_value().unwrap_or(false);
|
||||
let dictionary = CoreDictionary::new(
|
||||
sorted,
|
||||
python_to_core_value(keys)?,
|
||||
python_to_core_value(values)?,
|
||||
);
|
||||
dictionary
|
||||
.validate()
|
||||
.map_err(|error| PyValueError::new_err(error.to_string()))?;
|
||||
Ok(dictionary)
|
||||
}
|
||||
|
||||
fn table_from_python(qtype: &Type, columns: &Bound<'_, PyAny>) -> PyResult<CoreTable> {
|
||||
ensure_shape(qtype, Shape::Table)?;
|
||||
let columns = columns
|
||||
.cast::<PyDict>()
|
||||
.map_err(|_| PyValueError::new_err("table payloads must be Python dicts"))?;
|
||||
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||
let mut names = Vec::with_capacity(columns.len());
|
||||
let mut values = Vec::with_capacity(columns.len());
|
||||
for (name, column) in columns.iter() {
|
||||
names.push(bytes::Bytes::from(name.extract::<String>()?.into_bytes()));
|
||||
values.push(python_to_core_value(&column)?);
|
||||
}
|
||||
let table = CoreTable::new(attribute, names, values);
|
||||
table
|
||||
.validate()
|
||||
.map_err(|error| PyValueError::new_err(error.to_string()))?;
|
||||
Ok(table)
|
||||
}
|
||||
|
||||
fn ensure_shape(qtype: &Type, expected: Shape) -> PyResult<()> {
|
||||
if qtype.shape_value() != expected {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"qtype shape {:?} does not match {:?}",
|
||||
qtype.shape_value(),
|
||||
expected
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_list<T, F>(items: &Bound<'_, PyList>, convert: F) -> PyResult<Vec<T>>
|
||||
where
|
||||
F: Fn(&Bound<'_, PyAny>) -> PyResult<T>,
|
||||
{
|
||||
let mut values = Vec::with_capacity(items.len());
|
||||
for item in items.iter() {
|
||||
values.push(convert(&item)?);
|
||||
}
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
fn vector_to_pylist(py: Python<'_>, vector: &CoreVector) -> PyResult<Py<PyList>> {
|
||||
let len = vector.len();
|
||||
let mut values = Vec::with_capacity(len);
|
||||
for index in 0..len {
|
||||
values.push(vector_item_to_python(py, vector, index)?);
|
||||
}
|
||||
Ok(PyList::new(py, values)?.unbind())
|
||||
}
|
||||
|
||||
fn vector_item_to_python(py: Python<'_>, vector: &CoreVector, index: usize) -> PyResult<Py<PyAny>> {
|
||||
let data = vector.data();
|
||||
match data {
|
||||
VectorData::Boolean(values) => Ok((values[index] != 0)
|
||||
.into_pyobject(py)?
|
||||
.to_owned()
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Guid(values) => {
|
||||
let chunk = &values[index * 16..(index + 1) * 16];
|
||||
Ok(PyBytes::new(py, chunk).unbind().into_any())
|
||||
}
|
||||
VectorData::Byte(values) => Ok(values[index].into_pyobject(py)?.unbind().into_any()),
|
||||
VectorData::Short(_) => Ok(data.as_i16_slice()[index]
|
||||
.into_pyobject(py)?
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Int(_)
|
||||
| VectorData::Month(_)
|
||||
| VectorData::Date(_)
|
||||
| VectorData::Minute(_)
|
||||
| VectorData::Second(_)
|
||||
| VectorData::Time(_) => Ok(data.as_i32_slice()[index]
|
||||
.into_pyobject(py)?
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Long(_) | VectorData::Timestamp(_) | VectorData::Timespan(_) => Ok(data
|
||||
.as_i64_slice()[index]
|
||||
.into_pyobject(py)?
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Real(_) => Ok(data.as_f32_slice()[index]
|
||||
.into_pyobject(py)?
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Float(_) | VectorData::Datetime(_) => Ok(data.as_f64_slice()[index]
|
||||
.into_pyobject(py)?
|
||||
.unbind()
|
||||
.into_any()),
|
||||
VectorData::Char(values) => Ok(values[index].into_pyobject(py)?.unbind().into_any()),
|
||||
VectorData::Symbol(values) => Ok(PyBytes::new(py, &values[index]).unbind().into_any()),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_ingestion_error(e: IngestionError) -> PyErr {
|
||||
PyValueError::new_err(e.to_string())
|
||||
}
|
||||
|
||||
pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult<CoreValue> {
|
||||
// Try qroissant Value first (it also implements Arrow protocols, so must come first).
|
||||
if let Ok(q_value) = value.extract::<PyRef<'_, Value>>() {
|
||||
return Ok(q_value.inner().clone());
|
||||
}
|
||||
|
||||
// Check Arrow stream protocol (record batches → table).
|
||||
if value.hasattr("__arrow_c_stream__")? {
|
||||
let capsule_obj = value.getattr("__arrow_c_stream__")?.call0()?;
|
||||
let stream_capsule = capsule_obj.downcast::<PyCapsule>().map_err(PyErr::from)?;
|
||||
let reader =
|
||||
pyo3_arrow::PyRecordBatchReader::from_arrow_pycapsule(stream_capsule)?.into_reader()?;
|
||||
let schema = reader.schema();
|
||||
let value = ingest_record_batch_reader(schema, reader).map_err(map_ingestion_error)?;
|
||||
return Ok(value);
|
||||
}
|
||||
|
||||
// Check Arrow array protocol (single array or record batch).
|
||||
if value.hasattr("__arrow_c_array__")? {
|
||||
// Try extracting as a record batch first.
|
||||
if let Ok(record_batch) = value.extract::<pyo3_arrow::PyRecordBatch>() {
|
||||
let batch = record_batch.into_inner();
|
||||
let value = ingest_record_batch(batch).map_err(map_ingestion_error)?;
|
||||
return Ok(value);
|
||||
}
|
||||
// Fall back to plain array.
|
||||
let array: pyo3_arrow::PyArray = value.extract()?;
|
||||
let (array, field) = array.into_inner();
|
||||
let value = ingest_array(array, field.as_ref()).map_err(map_ingestion_error)?;
|
||||
return Ok(value);
|
||||
}
|
||||
|
||||
Err(PyNotImplementedError::new_err(
|
||||
"encoding non-qroissant values is not implemented yet; \
|
||||
pass a qroissant Value or an object implementing the Arrow protocol",
|
||||
))
|
||||
}
|
||||
|
||||
pub fn core_value_to_python(py: Python<'_>, value: CoreValue) -> PyResult<Py<PyAny>> {
|
||||
core_value_to_python_with_opts(py, value, Arc::new(ProjectionOptions::default()))
|
||||
}
|
||||
|
||||
pub fn core_value_to_python_with_opts(
|
||||
py: Python<'_>,
|
||||
value: CoreValue,
|
||||
opts: Arc<ProjectionOptions>,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
match value {
|
||||
CoreValue::Atom(atom) => Ok(Py::new(
|
||||
py,
|
||||
(Atom, Value::new_with_opts(CoreValue::Atom(atom), opts)),
|
||||
)?
|
||||
.into_any()),
|
||||
CoreValue::Vector(vector) => Ok(Py::new(
|
||||
py,
|
||||
(
|
||||
Vector,
|
||||
Value::new_with_opts(CoreValue::Vector(vector), opts),
|
||||
),
|
||||
)?
|
||||
.into_any()),
|
||||
CoreValue::List(list) => Ok(Py::new(
|
||||
py,
|
||||
(List, Value::new_with_opts(CoreValue::List(list), opts)),
|
||||
)?
|
||||
.into_any()),
|
||||
CoreValue::Dictionary(dictionary) => Ok(Py::new(
|
||||
py,
|
||||
(
|
||||
Dictionary,
|
||||
Value::new_with_opts(CoreValue::Dictionary(dictionary), opts),
|
||||
),
|
||||
)?
|
||||
.into_any()),
|
||||
CoreValue::Table(table) => Ok(Py::new(
|
||||
py,
|
||||
(Table, Value::new_with_opts(CoreValue::Table(table), opts)),
|
||||
)?
|
||||
.into_any()),
|
||||
CoreValue::UnaryPrimitive { opcode } => {
|
||||
Ok(Py::new(py, Value::new(CoreValue::UnaryPrimitive { opcode }))?.into_any())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
module.add_class::<Value>()?;
|
||||
module.add_class::<Atom>()?;
|
||||
module.add_class::<Vector>()?;
|
||||
module.add_class::<List>()?;
|
||||
module.add_class::<Dictionary>()?;
|
||||
module.add_class::<Table>()?;
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue