Vendor qroissant 0.3.0 baseline
This commit is contained in:
commit
53ac90fe84
56 changed files with 18309 additions and 0 deletions
31
.gitignore
vendored
Normal file
31
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Rust
|
||||||
|
/target/
|
||||||
|
**/target/
|
||||||
|
**/*.rs.bk
|
||||||
|
Cargo.lock.bak
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.egg-info/
|
||||||
|
.eggs/
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
|
||||||
|
# Maturin
|
||||||
|
*.so
|
||||||
|
*.pyd
|
||||||
|
*.dylib
|
||||||
|
|
||||||
|
# Editors / OS
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Local consumer code dropped in for repro, not part of qroissant
|
||||||
|
/document.py
|
||||||
1403
Cargo.lock
generated
Normal file
1403
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
26
Cargo.toml
Normal file
26
Cargo.toml
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
[workspace]
|
||||||
|
members = ["crates/qroissant-core", "crates/qroissant-kernels", "crates/qroissant-arrow", "crates/qroissant-transport", "crates/qroissant-python"]
|
||||||
|
resolver = "3"
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
|
version = "0.3.0"
|
||||||
|
edition = "2024"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
repository = "https://github.com/qroissant/qroissant"
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = "fat"
|
||||||
|
codegen-units = 1
|
||||||
|
opt-level = 3
|
||||||
|
|
||||||
|
[workspace.dependencies]
|
||||||
|
pyo3 = "0.28.2"
|
||||||
|
tokio = { version = "1.48.0", features = [
|
||||||
|
"io-util",
|
||||||
|
"net",
|
||||||
|
"rt-multi-thread",
|
||||||
|
"sync",
|
||||||
|
"time",
|
||||||
|
"macros",
|
||||||
|
] }
|
||||||
|
futures = "0.3"
|
||||||
246
PKG-INFO
Normal file
246
PKG-INFO
Normal file
|
|
@ -0,0 +1,246 @@
|
||||||
|
Metadata-Version: 2.4
|
||||||
|
Name: qroissant
|
||||||
|
Version: 0.3.0
|
||||||
|
Classifier: Development Status :: 3 - Alpha
|
||||||
|
Classifier: License :: OSI Approved :: Apache Software License
|
||||||
|
Classifier: Programming Language :: Python :: 3
|
||||||
|
Classifier: Programming Language :: Python :: 3 :: Only
|
||||||
|
Classifier: Programming Language :: Python :: 3.10
|
||||||
|
Classifier: Programming Language :: Python :: 3.11
|
||||||
|
Classifier: Programming Language :: Python :: 3.12
|
||||||
|
Classifier: Programming Language :: Python :: 3.13
|
||||||
|
Classifier: Programming Language :: Rust
|
||||||
|
Summary: q/kdb+ IPC client library with Arrow-native Python interoperability
|
||||||
|
Keywords: kdb,q,ipc,arrow,pyo3
|
||||||
|
Author: qroissant contributors
|
||||||
|
License-Expression: Apache-2.0
|
||||||
|
Requires-Python: >=3.10
|
||||||
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
||||||
|
|
||||||
|
# qroissant
|
||||||
|
|
||||||
|
qroissant is a minimal q/kdb+ IPC client library with first-class support for the Apache Arrow ecosystem.
|
||||||
|
|
||||||
|
- **Lightweight** — qroissant is a minimal library weighing in at less than 4 MiB with no required dependencies.
|
||||||
|
- **Fast** — qroissant is written in Rust, a safe and high-performance systems programming language. Moreover, qroissant uses your system resources to the best extent possible by leveraging zero-copy, multithreading, and other vectorization techniques such as SIMD.
|
||||||
|
- **Modular** — qroissant relies heavily on the [Apache Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for communicating with other libraries from the Apache Arrow ecosystem with zero-copy. This includes pyarrow, polars, duckdb, pandas, datafusion, and more.
|
||||||
|
- **Type hints** — qroissant provides type annotations for all of its functionality.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install qroissant
|
||||||
|
```
|
||||||
|
|
||||||
|
Requires Python 3.10+. Wheels are available for Linux (x86\_64, aarch64), macOS (universal2), and Windows (x86\_64).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
### Connect and query
|
||||||
|
|
||||||
|
```python
|
||||||
|
import qroissant as q
|
||||||
|
|
||||||
|
endpoint = q.Endpoint.tcp("localhost", 5000)
|
||||||
|
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
result = conn.query("select from trade where date = .z.d")
|
||||||
|
print(result) # Table
|
||||||
|
```
|
||||||
|
|
||||||
|
### To Arrow / Polars / PyArrow
|
||||||
|
|
||||||
|
Decoded values implement the Arrow PyCapsule protocol — pass them straight to any Arrow-aware library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import polars as pl
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
table = conn.query("select from trade")
|
||||||
|
|
||||||
|
# zero-copy — no intermediate Python objects
|
||||||
|
df = pl.from_arrow(table)
|
||||||
|
pa_table = pa.RecordBatch.from_batches([pa.record_batch(table)])
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
import qroissant as q
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
endpoint = q.Endpoint.tcp("localhost", 5000)
|
||||||
|
async with q.AsyncConnection(endpoint) as conn:
|
||||||
|
result = await conn.query("1 + 1")
|
||||||
|
print(result) # Atom → 2
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Connection pool
|
||||||
|
|
||||||
|
```python
|
||||||
|
pool_opts = q.PoolOptions(
|
||||||
|
max_size=10,
|
||||||
|
min_idle=2,
|
||||||
|
checkout_timeout_ms=5_000,
|
||||||
|
test_on_checkout=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with q.Pool(endpoint, pool=pool_opts) as pool:
|
||||||
|
pool.prewarm() # open idle connections eagerly
|
||||||
|
result = pool.query("count trade") # checked out and returned automatically
|
||||||
|
print(pool.metrics()) # PoolMetrics(connections=2, idle=2, …)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Streaming raw response
|
||||||
|
|
||||||
|
For large results you can stream the raw IPC bytes before decoding:
|
||||||
|
|
||||||
|
```python
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
with conn.query("select from trade", raw=True) as resp:
|
||||||
|
print(resp.header) # MessageHeader(size=…, compression=…)
|
||||||
|
value = resp.decode() # decode on demand
|
||||||
|
```
|
||||||
|
|
||||||
|
### Standalone encode / decode
|
||||||
|
|
||||||
|
```python
|
||||||
|
# decode an IPC payload you already have
|
||||||
|
payload: bytes = ...
|
||||||
|
value = q.decode(payload)
|
||||||
|
|
||||||
|
# encode a value back to IPC bytes
|
||||||
|
frame = q.encode(value, message_type=q.MessageType.SYNCHRONOUS)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Value types
|
||||||
|
|
||||||
|
Every `conn.query()` call returns a `Value` subclass:
|
||||||
|
|
||||||
|
| q type | Python type | Arrow export |
|
||||||
|
|--------|------------|--------------|
|
||||||
|
| scalar (atom) | `Atom` | `__arrow_c_array__` |
|
||||||
|
| typed list | `Vector` | `__arrow_c_array__` |
|
||||||
|
| mixed list | `List` | `__arrow_c_array__` |
|
||||||
|
| dictionary | `Dictionary` | `__arrow_c_array__` (StructArray) |
|
||||||
|
| table | `Table` | `__arrow_c_stream__` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Decode options
|
||||||
|
|
||||||
|
Control how IPC data is projected into Arrow:
|
||||||
|
|
||||||
|
```python
|
||||||
|
opts = (
|
||||||
|
q.DecodeOptions.builder()
|
||||||
|
.with_symbol_interpretation(q.SymbolInterpretation.DICTIONARY) # dict-encode symbols
|
||||||
|
.with_temporal_nulls(True) # map q null sentinels → None
|
||||||
|
.with_treat_infinity_as_null(True) # map ±∞ → None
|
||||||
|
.with_parallel(True) # decode table columns in parallel
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
with q.Connection(endpoint, options=opts) as conn:
|
||||||
|
result = conn.query("select from trade")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
```python
|
||||||
|
# TCP
|
||||||
|
endpoint = q.Endpoint.tcp(
|
||||||
|
"localhost", 5000,
|
||||||
|
username="user",
|
||||||
|
password="pass",
|
||||||
|
timeout_ms=3_000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unix domain socket
|
||||||
|
endpoint = q.Endpoint.unix(
|
||||||
|
"/tmp/qroissant.sock",
|
||||||
|
username="user",
|
||||||
|
password="pass",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
from qroissant import (
|
||||||
|
QroissantError, # base class
|
||||||
|
DecodeError, # malformed IPC payload
|
||||||
|
ProtocolError, # bad frame header
|
||||||
|
TransportError, # socket / IO failure
|
||||||
|
QRuntimeError, # q process returned an error
|
||||||
|
PoolError, # pool management failure
|
||||||
|
PoolClosedError, # operation on a closed pool
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = conn.query("invalid expression")
|
||||||
|
except q.QRuntimeError as e:
|
||||||
|
print(f"q error: {e}")
|
||||||
|
except q.TransportError as e:
|
||||||
|
print(f"connection lost: {e}")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
qroissant is organized as a Rust workspace with strict crate boundaries:
|
||||||
|
|
||||||
|
```
|
||||||
|
crates/
|
||||||
|
├── qroissant-core # q protocol, value types, encode/decode
|
||||||
|
├── qroissant-transport # sync & async TCP/Unix socket connections
|
||||||
|
├── qroissant-arrow # zero-copy Arrow projection
|
||||||
|
├── qroissant-kernels # SIMD / nightly-sensitive hot paths
|
||||||
|
└── qroissant-python # PyO3 bindings (the _native extension module)
|
||||||
|
```
|
||||||
|
|
||||||
|
The Python package at `python/qroissant/` re-exports everything from the compiled `_native` extension. The `.pyi` stub files in that directory define the public API contract.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install Python dependencies
|
||||||
|
uv sync --group dev --group docs
|
||||||
|
|
||||||
|
# Build the Rust extension (required before running Python tests)
|
||||||
|
uv run maturin develop
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
uv run pytest
|
||||||
|
cargo test --workspace
|
||||||
|
|
||||||
|
# Lint and format
|
||||||
|
uv run ruff check python/ tests/
|
||||||
|
cargo fmt --all
|
||||||
|
```
|
||||||
|
|
||||||
|
Transport integration tests require a q binary. Set `Q_BIN` to the path of your q executable before running `pytest`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Apache 2.0 — see [LICENSE](LICENSE).
|
||||||
|
|
||||||
226
README.md
Normal file
226
README.md
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
# qroissant
|
||||||
|
|
||||||
|
qroissant is a minimal q/kdb+ IPC client library with first-class support for the Apache Arrow ecosystem.
|
||||||
|
|
||||||
|
- **Lightweight** — qroissant is a minimal library weighing in at less than 4 MiB with no required dependencies.
|
||||||
|
- **Fast** — qroissant is written in Rust, a safe and high-performance systems programming language. Moreover, qroissant uses your system resources to the best extent possible by leveraging zero-copy, multithreading, and other vectorization techniques such as SIMD.
|
||||||
|
- **Modular** — qroissant relies heavily on the [Apache Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) for communicating with other libraries from the Apache Arrow ecosystem with zero-copy. This includes pyarrow, polars, duckdb, pandas, datafusion, and more.
|
||||||
|
- **Type hints** — qroissant provides type annotations for all of its functionality.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install qroissant
|
||||||
|
```
|
||||||
|
|
||||||
|
Requires Python 3.10+. Wheels are available for Linux (x86\_64, aarch64), macOS (universal2), and Windows (x86\_64).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
### Connect and query
|
||||||
|
|
||||||
|
```python
|
||||||
|
import qroissant as q
|
||||||
|
|
||||||
|
endpoint = q.Endpoint.tcp("localhost", 5000)
|
||||||
|
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
result = conn.query("select from trade where date = .z.d")
|
||||||
|
print(result) # Table
|
||||||
|
```
|
||||||
|
|
||||||
|
### To Arrow / Polars / PyArrow
|
||||||
|
|
||||||
|
Decoded values implement the Arrow PyCapsule protocol — pass them straight to any Arrow-aware library:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import polars as pl
|
||||||
|
import pyarrow as pa
|
||||||
|
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
table = conn.query("select from trade")
|
||||||
|
|
||||||
|
# zero-copy — no intermediate Python objects
|
||||||
|
df = pl.from_arrow(table)
|
||||||
|
pa_table = pa.RecordBatch.from_batches([pa.record_batch(table)])
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async
|
||||||
|
|
||||||
|
```python
|
||||||
|
import asyncio
|
||||||
|
import qroissant as q
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
endpoint = q.Endpoint.tcp("localhost", 5000)
|
||||||
|
async with q.AsyncConnection(endpoint) as conn:
|
||||||
|
result = await conn.query("1 + 1")
|
||||||
|
print(result) # Atom → 2
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
### Connection pool
|
||||||
|
|
||||||
|
```python
|
||||||
|
pool_opts = q.PoolOptions(
|
||||||
|
max_size=10,
|
||||||
|
min_idle=2,
|
||||||
|
checkout_timeout_ms=5_000,
|
||||||
|
test_on_checkout=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
with q.Pool(endpoint, pool=pool_opts) as pool:
|
||||||
|
pool.prewarm() # open idle connections eagerly
|
||||||
|
result = pool.query("count trade") # checked out and returned automatically
|
||||||
|
print(pool.metrics()) # PoolMetrics(connections=2, idle=2, …)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Streaming raw response
|
||||||
|
|
||||||
|
For large results you can stream the raw IPC bytes before decoding:
|
||||||
|
|
||||||
|
```python
|
||||||
|
with q.Connection(endpoint) as conn:
|
||||||
|
with conn.query("select from trade", raw=True) as resp:
|
||||||
|
print(resp.header) # MessageHeader(size=…, compression=…)
|
||||||
|
value = resp.decode() # decode on demand
|
||||||
|
```
|
||||||
|
|
||||||
|
### Standalone encode / decode
|
||||||
|
|
||||||
|
```python
|
||||||
|
# decode an IPC payload you already have
|
||||||
|
payload: bytes = ...
|
||||||
|
value = q.decode(payload)
|
||||||
|
|
||||||
|
# encode a value back to IPC bytes
|
||||||
|
frame = q.encode(value, message_type=q.MessageType.SYNCHRONOUS)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Value types
|
||||||
|
|
||||||
|
Every `conn.query()` call returns a `Value` subclass:
|
||||||
|
|
||||||
|
| q type | Python type | Arrow export |
|
||||||
|
|--------|------------|--------------|
|
||||||
|
| scalar (atom) | `Atom` | `__arrow_c_array__` |
|
||||||
|
| typed list | `Vector` | `__arrow_c_array__` |
|
||||||
|
| mixed list | `List` | `__arrow_c_array__` |
|
||||||
|
| dictionary | `Dictionary` | `__arrow_c_array__` (StructArray) |
|
||||||
|
| table | `Table` | `__arrow_c_stream__` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Decode options
|
||||||
|
|
||||||
|
Control how IPC data is projected into Arrow:
|
||||||
|
|
||||||
|
```python
|
||||||
|
opts = (
|
||||||
|
q.DecodeOptions.builder()
|
||||||
|
.with_symbol_interpretation(q.SymbolInterpretation.DICTIONARY) # dict-encode symbols
|
||||||
|
.with_temporal_nulls(True) # map q null sentinels → None
|
||||||
|
.with_treat_infinity_as_null(True) # map ±∞ → None
|
||||||
|
.with_parallel(True) # decode table columns in parallel
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
with q.Connection(endpoint, options=opts) as conn:
|
||||||
|
result = conn.query("select from trade")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
```python
|
||||||
|
# TCP
|
||||||
|
endpoint = q.Endpoint.tcp(
|
||||||
|
"localhost", 5000,
|
||||||
|
username="user",
|
||||||
|
password="pass",
|
||||||
|
timeout_ms=3_000,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Unix domain socket
|
||||||
|
endpoint = q.Endpoint.unix(
|
||||||
|
"/tmp/qroissant.sock",
|
||||||
|
username="user",
|
||||||
|
password="pass",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Error handling
|
||||||
|
|
||||||
|
```python
|
||||||
|
from qroissant import (
|
||||||
|
QroissantError, # base class
|
||||||
|
DecodeError, # malformed IPC payload
|
||||||
|
ProtocolError, # bad frame header
|
||||||
|
TransportError, # socket / IO failure
|
||||||
|
QRuntimeError, # q process returned an error
|
||||||
|
PoolError, # pool management failure
|
||||||
|
PoolClosedError, # operation on a closed pool
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = conn.query("invalid expression")
|
||||||
|
except q.QRuntimeError as e:
|
||||||
|
print(f"q error: {e}")
|
||||||
|
except q.TransportError as e:
|
||||||
|
print(f"connection lost: {e}")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
qroissant is organized as a Rust workspace with strict crate boundaries:
|
||||||
|
|
||||||
|
```
|
||||||
|
crates/
|
||||||
|
├── qroissant-core # q protocol, value types, encode/decode
|
||||||
|
├── qroissant-transport # sync & async TCP/Unix socket connections
|
||||||
|
├── qroissant-arrow # zero-copy Arrow projection
|
||||||
|
├── qroissant-kernels # SIMD / nightly-sensitive hot paths
|
||||||
|
└── qroissant-python # PyO3 bindings (the _native extension module)
|
||||||
|
```
|
||||||
|
|
||||||
|
The Python package at `python/qroissant/` re-exports everything from the compiled `_native` extension. The `.pyi` stub files in that directory define the public API contract.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install Python dependencies
|
||||||
|
uv sync --group dev --group docs
|
||||||
|
|
||||||
|
# Build the Rust extension (required before running Python tests)
|
||||||
|
uv run maturin develop
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
uv run pytest
|
||||||
|
cargo test --workspace
|
||||||
|
|
||||||
|
# Lint and format
|
||||||
|
uv run ruff check python/ tests/
|
||||||
|
cargo fmt --all
|
||||||
|
```
|
||||||
|
|
||||||
|
Transport integration tests require a q binary. Set `Q_BIN` to the path of your q executable before running `pytest`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Apache 2.0 — see [LICENSE](LICENSE).
|
||||||
24
crates/qroissant-arrow/Cargo.toml
Normal file
24
crates/qroissant-arrow/Cargo.toml
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
[package]
|
||||||
|
name = "qroissant-arrow"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "qroissant_arrow"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
arrow-array = "58.0.0"
|
||||||
|
arrow-buffer = "58.0.0"
|
||||||
|
arrow-schema = "58.0.0"
|
||||||
|
arrow-select = "58.0.0"
|
||||||
|
bytemuck = { version = "1", features = ["derive", "extern_crate_alloc"] }
|
||||||
|
bytes = "1.11.1"
|
||||||
|
chrono = "0.4.44"
|
||||||
|
qroissant-core = { path = "../qroissant-core" }
|
||||||
|
qroissant-kernels = { path = "../qroissant-kernels" }
|
||||||
|
rayon = "1.10"
|
||||||
|
thiserror = "2.0.18"
|
||||||
|
|
||||||
23
crates/qroissant-arrow/src/error.rs
Normal file
23
crates/qroissant-arrow/src/error.rs
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum ProjectionError {
|
||||||
|
#[error("Arrow projection is not supported for {0}")]
|
||||||
|
Unsupported(String),
|
||||||
|
|
||||||
|
#[error("Arrow projection failed: {0}")]
|
||||||
|
Arrow(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type ProjectionResult<T> = Result<T, ProjectionError>;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum IngestionError {
|
||||||
|
#[error("Arrow ingestion is not supported: {0}")]
|
||||||
|
Unsupported(String),
|
||||||
|
|
||||||
|
#[error("Arrow ingestion failed: {0}")]
|
||||||
|
Arrow(#[from] arrow_schema::ArrowError),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type IngestionResult<T> = Result<T, IngestionError>;
|
||||||
1576
crates/qroissant-arrow/src/ingestion.rs
Normal file
1576
crates/qroissant-arrow/src/ingestion.rs
Normal file
File diff suppressed because it is too large
Load diff
26
crates/qroissant-arrow/src/lib.rs
Normal file
26
crates/qroissant-arrow/src/lib.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
//! Arrow interop layer for qroissant.
|
||||||
|
//!
|
||||||
|
//! Converts decoded q `Value` trees (from `qroissant-core`) into Apache Arrow
|
||||||
|
//! arrays and record batches. PyO3 and PyCapsule handling live in
|
||||||
|
//! `qroissant-python`; this crate is intentionally free of Python dependencies.
|
||||||
|
|
||||||
|
pub mod error;
|
||||||
|
pub mod ingestion;
|
||||||
|
pub mod metadata;
|
||||||
|
pub mod options;
|
||||||
|
pub mod projection;
|
||||||
|
|
||||||
|
pub use error::IngestionError;
|
||||||
|
pub use ingestion::ingest_array;
|
||||||
|
pub use ingestion::ingest_record_batch;
|
||||||
|
pub use ingestion::ingest_record_batch_reader;
|
||||||
|
pub use options::ListProjection;
|
||||||
|
pub use options::ProjectionOptions;
|
||||||
|
pub use options::StringProjection;
|
||||||
|
pub use options::SymbolProjection;
|
||||||
|
pub use options::UnionMode;
|
||||||
|
pub use projection::ArrayExport;
|
||||||
|
pub use projection::BatchExport;
|
||||||
|
pub use projection::project;
|
||||||
|
pub use projection::project_table;
|
||||||
|
pub use qroissant_core::HEADER_LEN as QIPC_HEADER_LEN;
|
||||||
90
crates/qroissant-arrow/src/metadata.rs
Normal file
90
crates/qroissant-arrow/src/metadata.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
//! Arrow field metadata helpers for preserving q type semantics.
|
||||||
|
//!
|
||||||
|
//! Every Arrow field produced by qroissant carries metadata that round-trips
|
||||||
|
//! the original q shape, primitive type, and attribute information so that
|
||||||
|
//! downstream consumers can reconstruct exact q semantics from an Arrow schema.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use arrow_schema::Field;
|
||||||
|
use arrow_schema::FieldRef;
|
||||||
|
use qroissant_core::Attribute;
|
||||||
|
use qroissant_core::Primitive;
|
||||||
|
|
||||||
|
pub const SHAPE_KEY: &str = "qroissant.shape";
|
||||||
|
pub const PRIMITIVE_KEY: &str = "qroissant.primitive";
|
||||||
|
pub const ATTRIBUTE_KEY: &str = "qroissant.attribute";
|
||||||
|
pub const SORTED_KEY: &str = "qroissant.sorted";
|
||||||
|
|
||||||
|
fn shape_to_str(shape: &str) -> &str {
|
||||||
|
match shape {
|
||||||
|
"atom" => "atom",
|
||||||
|
"vector" => "vector",
|
||||||
|
"list" => "list",
|
||||||
|
"dictionary" => "dictionary",
|
||||||
|
"table" => "table",
|
||||||
|
"unary_primitive" => "unary_primitive",
|
||||||
|
other => other,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn primitive_str(p: Primitive) -> &'static str {
|
||||||
|
match p {
|
||||||
|
Primitive::Boolean => "boolean",
|
||||||
|
Primitive::Guid => "guid",
|
||||||
|
Primitive::Byte => "byte",
|
||||||
|
Primitive::Short => "short",
|
||||||
|
Primitive::Int => "int",
|
||||||
|
Primitive::Long => "long",
|
||||||
|
Primitive::Real => "real",
|
||||||
|
Primitive::Float => "float",
|
||||||
|
Primitive::Char => "char",
|
||||||
|
Primitive::Symbol => "symbol",
|
||||||
|
Primitive::Timestamp => "timestamp",
|
||||||
|
Primitive::Month => "month",
|
||||||
|
Primitive::Date => "date",
|
||||||
|
Primitive::Datetime => "datetime",
|
||||||
|
Primitive::Timespan => "timespan",
|
||||||
|
Primitive::Minute => "minute",
|
||||||
|
Primitive::Second => "second",
|
||||||
|
Primitive::Time => "time",
|
||||||
|
Primitive::Mixed => "mixed",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn attribute_str(a: Attribute) -> &'static str {
|
||||||
|
match a {
|
||||||
|
Attribute::None => "none",
|
||||||
|
Attribute::Sorted => "sorted",
|
||||||
|
Attribute::Unique => "unique",
|
||||||
|
Attribute::Parted => "parted",
|
||||||
|
Attribute::Grouped => "grouped",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build an Arrow field for a q atom or vector column.
|
||||||
|
///
|
||||||
|
/// The field name is left empty (`""`); callers that embed the field in a
|
||||||
|
/// schema or struct field should rename it via [`arrow_schema::Field::with_name`].
|
||||||
|
pub fn q_field(
|
||||||
|
data_type: arrow_schema::DataType,
|
||||||
|
nullable: bool,
|
||||||
|
shape: &str,
|
||||||
|
primitive: Option<Primitive>,
|
||||||
|
attribute: Option<Attribute>,
|
||||||
|
sorted: Option<bool>,
|
||||||
|
) -> FieldRef {
|
||||||
|
let mut meta = HashMap::new();
|
||||||
|
meta.insert(SHAPE_KEY.to_string(), shape_to_str(shape).to_string());
|
||||||
|
if let Some(p) = primitive {
|
||||||
|
meta.insert(PRIMITIVE_KEY.to_string(), primitive_str(p).to_string());
|
||||||
|
}
|
||||||
|
if let Some(a) = attribute {
|
||||||
|
meta.insert(ATTRIBUTE_KEY.to_string(), attribute_str(a).to_string());
|
||||||
|
}
|
||||||
|
if let Some(s) = sorted {
|
||||||
|
meta.insert(SORTED_KEY.to_string(), s.to_string());
|
||||||
|
}
|
||||||
|
Arc::new(Field::new("", data_type, nullable).with_metadata(meta))
|
||||||
|
}
|
||||||
85
crates/qroissant-arrow/src/options.rs
Normal file
85
crates/qroissant-arrow/src/options.rs
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
//! Projection-level configuration for `qroissant-arrow`.
|
||||||
|
//!
|
||||||
|
//! This module is intentionally free of PyO3 so the arrow crate can remain
|
||||||
|
//! Python-agnostic. The Python crate converts `DecodeOptions` into a
|
||||||
|
//! `ProjectionOptions` at decode time and stores it alongside the value.
|
||||||
|
|
||||||
|
/// How to project q symbol (byte-string) values into Arrow.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum SymbolProjection {
|
||||||
|
/// Arrow `Utf8` / `StringArray`. Lossily converts non-UTF-8 bytes.
|
||||||
|
#[default]
|
||||||
|
Utf8,
|
||||||
|
/// Arrow `LargeUtf8` / `LargeStringArray`.
|
||||||
|
LargeUtf8,
|
||||||
|
/// Arrow `Utf8View` / `StringViewArray`.
|
||||||
|
Utf8View,
|
||||||
|
/// Arrow `Dictionary<Int32, Utf8>`.
|
||||||
|
Dictionary,
|
||||||
|
/// Arrow `Binary` / `BinaryArray` — raw bytes, no UTF-8 coercion.
|
||||||
|
RawBytes,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How to project q char-vector (byte string) values into Arrow.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum StringProjection {
|
||||||
|
/// Arrow `Utf8` / `StringArray` (best-effort UTF-8).
|
||||||
|
#[default]
|
||||||
|
Utf8,
|
||||||
|
/// Arrow `Binary` / `BinaryArray` — raw bytes.
|
||||||
|
Binary,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrapper Arrow type used for homogeneous q list projection.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum ListProjection {
|
||||||
|
/// Arrow `List<i32>` / `ListArray`.
|
||||||
|
List,
|
||||||
|
/// Arrow `LargeList<i64>` / `LargeListArray`.
|
||||||
|
#[default]
|
||||||
|
LargeList,
|
||||||
|
/// Arrow `ListView<i32>` — not yet supported; falls back to `List`.
|
||||||
|
ListView,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Union encoding for heterogeneous q list projection.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum UnionMode {
|
||||||
|
/// Arrow dense union (compact offsets).
|
||||||
|
#[default]
|
||||||
|
Dense,
|
||||||
|
/// Arrow sparse union (one slot per item per type).
|
||||||
|
Sparse,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Combined projection options threaded through `project()` / `project_table()`.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub struct ProjectionOptions {
|
||||||
|
pub symbol: SymbolProjection,
|
||||||
|
pub string: StringProjection,
|
||||||
|
pub list: ListProjection,
|
||||||
|
pub union_mode: UnionMode,
|
||||||
|
/// When `true`, q infinity sentinels (e.g. `0Wi`, `0Wj`, `0w`) are mapped
|
||||||
|
/// to Arrow nulls alongside the standard null sentinels. Default: `false`.
|
||||||
|
pub treat_infinity_as_null: bool,
|
||||||
|
/// When `true` and the table has at least 4 columns, column projection
|
||||||
|
/// is performed in parallel using rayon. Default: `true`.
|
||||||
|
pub parallel: bool,
|
||||||
|
/// When `true`, symbol bytes are assumed to be valid UTF-8 and are
|
||||||
|
/// reinterpreted without validation or allocation. Default: `true`.
|
||||||
|
pub assume_symbol_utf8: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ProjectionOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
symbol: SymbolProjection::default(),
|
||||||
|
string: StringProjection::default(),
|
||||||
|
list: ListProjection::default(),
|
||||||
|
union_mode: UnionMode::default(),
|
||||||
|
treat_infinity_as_null: false,
|
||||||
|
parallel: true,
|
||||||
|
assume_symbol_utf8: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
1160
crates/qroissant-arrow/src/projection.rs
Normal file
1160
crates/qroissant-arrow/src/projection.rs
Normal file
File diff suppressed because it is too large
Load diff
19
crates/qroissant-core/Cargo.toml
Normal file
19
crates/qroissant-core/Cargo.toml
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
[package]
|
||||||
|
name = "qroissant-core"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "qroissant_core"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
bytemuck = { version = "1", features = ["derive"] }
|
||||||
|
bytes = "1.11.1"
|
||||||
|
memchr = "2"
|
||||||
|
rayon = "1.10"
|
||||||
|
tokio = { workspace = true, features = ["io-util"] }
|
||||||
|
futures = { workspace = true }
|
||||||
|
|
||||||
907
crates/qroissant-core/src/decode.rs
Normal file
907
crates/qroissant-core/src/decode.rs
Normal file
|
|
@ -0,0 +1,907 @@
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
use crate::extent::value_byte_extent;
|
||||||
|
use crate::frame::Compression;
|
||||||
|
use crate::frame::Encoding;
|
||||||
|
use crate::frame::Frame;
|
||||||
|
use crate::frame::MessageHeader;
|
||||||
|
use crate::frame::decompress_ipc_body;
|
||||||
|
use crate::protocol::Attribute;
|
||||||
|
use crate::protocol::Primitive;
|
||||||
|
use crate::protocol::Shape;
|
||||||
|
use crate::protocol::TypeCode;
|
||||||
|
use crate::protocol::ValueType;
|
||||||
|
use crate::value::Atom;
|
||||||
|
use crate::value::Dictionary;
|
||||||
|
use crate::value::List;
|
||||||
|
use crate::value::Table;
|
||||||
|
use crate::value::Value;
|
||||||
|
use crate::value::Vector;
|
||||||
|
use crate::value::VectorData;
|
||||||
|
|
||||||
|
/// Fully decoded q IPC message.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct DecodedMessage {
|
||||||
|
header: MessageHeader,
|
||||||
|
value: Value,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DecodedMessage {
|
||||||
|
pub fn new(header: MessageHeader, value: Value) -> Self {
|
||||||
|
Self { header, value }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn header(&self) -> MessageHeader {
|
||||||
|
self.header
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn value(&self) -> &Value {
|
||||||
|
&self.value
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn qtype(&self) -> ValueType {
|
||||||
|
self.value.qtype()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_parts(self) -> (MessageHeader, Value) {
|
||||||
|
(self.header, self.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Options controlling how q IPC messages are decoded.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct DecodeOptions {
|
||||||
|
/// When `true` and the top-level value is a table with at least
|
||||||
|
/// `parallel_column_threshold` columns, columns are decoded in parallel
|
||||||
|
/// using rayon's thread pool.
|
||||||
|
pub parallel: bool,
|
||||||
|
/// Minimum number of columns required to trigger parallel decode.
|
||||||
|
pub parallel_column_threshold: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DecodeOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
parallel: true,
|
||||||
|
parallel_column_threshold: 4,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BodyReader {
|
||||||
|
bytes: bytes::Bytes,
|
||||||
|
offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BodyReader {
|
||||||
|
fn new(bytes: bytes::Bytes) -> Self {
|
||||||
|
Self { bytes, offset: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remaining(&self) -> usize {
|
||||||
|
self.bytes.len().saturating_sub(self.offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_exact<const N: usize>(&mut self) -> CoreResult<[u8; N]> {
|
||||||
|
let end = self
|
||||||
|
.offset
|
||||||
|
.checked_add(N)
|
||||||
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||||
|
let slice = self
|
||||||
|
.bytes
|
||||||
|
.get(self.offset..end)
|
||||||
|
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
||||||
|
self.offset = end;
|
||||||
|
Ok(slice.try_into().expect("fixed-size slice length checked"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a borrowed slice of `len` bytes and advances the offset.
|
||||||
|
fn read_slice(&mut self, len: usize) -> CoreResult<&[u8]> {
|
||||||
|
let end = self
|
||||||
|
.offset
|
||||||
|
.checked_add(len)
|
||||||
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||||
|
let slice = self
|
||||||
|
.bytes
|
||||||
|
.get(self.offset..end)
|
||||||
|
.ok_or_else(|| std::io::Error::from(std::io::ErrorKind::UnexpectedEof))?;
|
||||||
|
self.offset = end;
|
||||||
|
Ok(slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a zero-copy Bytes wrapper of `len` bytes and advances the offset.
|
||||||
|
fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
|
||||||
|
let end = self
|
||||||
|
.offset
|
||||||
|
.checked_add(len)
|
||||||
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||||
|
if end > self.bytes.len() {
|
||||||
|
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||||
|
}
|
||||||
|
let slice = self.bytes.slice(self.offset..end);
|
||||||
|
self.offset = end;
|
||||||
|
Ok(slice)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a `Bytes` wrapper of `count * size_of::<T>()` bytes, aligned for `T`.
|
||||||
|
///
|
||||||
|
/// If the current offset is already aligned for `T`, this is zero-copy
|
||||||
|
/// (a `Bytes::slice`). Otherwise it copies into a new aligned allocation.
|
||||||
|
fn read_bytes_aligned<T: bytemuck::Pod>(&mut self, count: usize) -> CoreResult<bytes::Bytes> {
|
||||||
|
let byte_len = count
|
||||||
|
.checked_mul(std::mem::size_of::<T>())
|
||||||
|
.ok_or(CoreError::LengthOverflow(count))?;
|
||||||
|
let end = self
|
||||||
|
.offset
|
||||||
|
.checked_add(byte_len)
|
||||||
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||||
|
if end > self.bytes.len() {
|
||||||
|
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||||
|
}
|
||||||
|
let ptr = self.bytes[self.offset..].as_ptr();
|
||||||
|
let align = std::mem::align_of::<T>();
|
||||||
|
let result = if (ptr as usize) % align == 0 {
|
||||||
|
// Already aligned — zero-copy slice.
|
||||||
|
self.bytes.slice(self.offset..end)
|
||||||
|
} else {
|
||||||
|
// Misaligned — must copy into an aligned allocation.
|
||||||
|
bytes::Bytes::copy_from_slice(&self.bytes[self.offset..end])
|
||||||
|
};
|
||||||
|
self.offset = end;
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_u8(&mut self) -> CoreResult<u8> {
|
||||||
|
Ok(self.read_exact::<1>()?[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_i8(&mut self) -> CoreResult<i8> {
|
||||||
|
Ok(self.read_u8()? as i8)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_i16(&mut self) -> CoreResult<i16> {
|
||||||
|
Ok(i16::from_le_bytes(self.read_exact::<2>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_i32(&mut self) -> CoreResult<i32> {
|
||||||
|
Ok(i32::from_le_bytes(self.read_exact::<4>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_i64(&mut self) -> CoreResult<i64> {
|
||||||
|
Ok(i64::from_le_bytes(self.read_exact::<8>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_f32(&mut self) -> CoreResult<f32> {
|
||||||
|
Ok(f32::from_le_bytes(self.read_exact::<4>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_f64(&mut self) -> CoreResult<f64> {
|
||||||
|
Ok(f64::from_le_bytes(self.read_exact::<8>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
|
||||||
|
self.read_exact::<16>()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_length(&mut self) -> CoreResult<usize> {
|
||||||
|
let length = self.read_i32()?;
|
||||||
|
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
|
||||||
|
let remaining = &self.bytes[self.offset..];
|
||||||
|
match memchr::memchr(0, remaining) {
|
||||||
|
Some(pos) => {
|
||||||
|
let symbol = self.bytes.slice(self.offset..self.offset + pos);
|
||||||
|
self.offset += pos + 1;
|
||||||
|
Ok(symbol)
|
||||||
|
}
|
||||||
|
None => Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads `count` elements of a fixed-width type as a bulk memcpy.
|
||||||
|
///
|
||||||
|
/// The wire bytes are reinterpreted directly into the target `Vec<T>` via
|
||||||
|
/// `bytemuck::cast_slice_mut`, avoiding per-element parsing. This is valid
|
||||||
|
/// because we only support little-endian payloads and all target platforms
|
||||||
|
/// are little-endian.
|
||||||
|
fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
|
||||||
|
&mut self,
|
||||||
|
count: usize,
|
||||||
|
) -> CoreResult<Vec<T>> {
|
||||||
|
let byte_len = count
|
||||||
|
.checked_mul(std::mem::size_of::<T>())
|
||||||
|
.ok_or(CoreError::LengthOverflow(count))?;
|
||||||
|
let bytes = self.read_slice(byte_len)?;
|
||||||
|
let mut values = vec![T::zeroed(); count];
|
||||||
|
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
|
||||||
|
dst.copy_from_slice(bytes);
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_atom(reader: &mut BodyReader, primitive: Primitive) -> CoreResult<Atom> {
|
||||||
|
Ok(match primitive {
|
||||||
|
Primitive::Boolean => Atom::Boolean(reader.read_u8()? != 0),
|
||||||
|
Primitive::Guid => Atom::Guid(reader.read_guid()?),
|
||||||
|
Primitive::Byte => Atom::Byte(reader.read_u8()?),
|
||||||
|
Primitive::Short => Atom::Short(reader.read_i16()?),
|
||||||
|
Primitive::Int => Atom::Int(reader.read_i32()?),
|
||||||
|
Primitive::Long => Atom::Long(reader.read_i64()?),
|
||||||
|
Primitive::Real => Atom::Real(reader.read_f32()?),
|
||||||
|
Primitive::Float => Atom::Float(reader.read_f64()?),
|
||||||
|
Primitive::Char => Atom::Char(reader.read_u8()?),
|
||||||
|
Primitive::Symbol => Atom::Symbol(reader.read_symbol()?),
|
||||||
|
Primitive::Timestamp => Atom::Timestamp(reader.read_i64()?),
|
||||||
|
Primitive::Month => Atom::Month(reader.read_i32()?),
|
||||||
|
Primitive::Date => Atom::Date(reader.read_i32()?),
|
||||||
|
Primitive::Datetime => Atom::Datetime(reader.read_f64()?),
|
||||||
|
Primitive::Timespan => Atom::Timespan(reader.read_i64()?),
|
||||||
|
Primitive::Minute => Atom::Minute(reader.read_i32()?),
|
||||||
|
Primitive::Second => Atom::Second(reader.read_i32()?),
|
||||||
|
Primitive::Time => Atom::Time(reader.read_i32()?),
|
||||||
|
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_vector(
|
||||||
|
reader: &mut BodyReader,
|
||||||
|
primitive: Primitive,
|
||||||
|
attribute: Attribute,
|
||||||
|
length: usize,
|
||||||
|
) -> CoreResult<Vector> {
|
||||||
|
let data = match primitive {
|
||||||
|
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length)?),
|
||||||
|
Primitive::Guid => VectorData::Guid(
|
||||||
|
reader.read_bytes(
|
||||||
|
length
|
||||||
|
.checked_mul(16)
|
||||||
|
.ok_or(CoreError::LengthOverflow(length))?,
|
||||||
|
)?,
|
||||||
|
),
|
||||||
|
Primitive::Byte => VectorData::Byte(reader.read_bytes(length)?),
|
||||||
|
Primitive::Short => VectorData::Short(reader.read_bytes_aligned::<i16>(length)?),
|
||||||
|
Primitive::Int => VectorData::Int(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Long => VectorData::Long(reader.read_bytes_aligned::<i64>(length)?),
|
||||||
|
Primitive::Real => VectorData::Real(reader.read_bytes_aligned::<f32>(length)?),
|
||||||
|
Primitive::Float => VectorData::Float(reader.read_bytes_aligned::<f64>(length)?),
|
||||||
|
Primitive::Char => VectorData::Char(reader.read_bytes(length)?),
|
||||||
|
Primitive::Symbol => {
|
||||||
|
let mut values = Vec::with_capacity(length);
|
||||||
|
for _ in 0..length {
|
||||||
|
values.push(reader.read_symbol()?);
|
||||||
|
}
|
||||||
|
VectorData::Symbol(values)
|
||||||
|
}
|
||||||
|
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes_aligned::<i64>(length)?),
|
||||||
|
Primitive::Month => VectorData::Month(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Date => VectorData::Date(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Datetime => VectorData::Datetime(reader.read_bytes_aligned::<f64>(length)?),
|
||||||
|
Primitive::Timespan => VectorData::Timespan(reader.read_bytes_aligned::<i64>(length)?),
|
||||||
|
Primitive::Minute => VectorData::Minute(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Second => VectorData::Second(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Time => VectorData::Time(reader.read_bytes_aligned::<i32>(length)?),
|
||||||
|
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Vector::new(attribute, data))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn extract_symbol_names(value: &Value) -> CoreResult<Vec<bytes::Bytes>> {
|
||||||
|
match value {
|
||||||
|
Value::Vector(vector) => match vector.data() {
|
||||||
|
VectorData::Symbol(values) => Ok(values.clone()),
|
||||||
|
_ => Err(CoreError::InvalidStructure(
|
||||||
|
"q table column names must be a symbol vector".to_string(),
|
||||||
|
)),
|
||||||
|
},
|
||||||
|
_ => Err(CoreError::InvalidStructure(
|
||||||
|
"q table column names must be encoded as a symbol vector".to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn extract_columns(value: &Value) -> CoreResult<Vec<Value>> {
|
||||||
|
match value {
|
||||||
|
Value::List(list) => Ok(list.values().to_vec()),
|
||||||
|
_ => Err(CoreError::InvalidStructure(
|
||||||
|
"q table columns must be encoded as a general list".to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_inner(reader: &mut BodyReader) -> CoreResult<Value> {
|
||||||
|
let type_code = TypeCode::try_from(reader.read_i8()?)?;
|
||||||
|
match type_code.shape() {
|
||||||
|
Shape::Atom => Ok(Value::Atom(decode_atom(
|
||||||
|
reader,
|
||||||
|
type_code
|
||||||
|
.primitive()
|
||||||
|
.expect("atom types always have a primitive"),
|
||||||
|
)?)),
|
||||||
|
Shape::Vector => {
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||||
|
let length = reader.read_length()?;
|
||||||
|
Ok(Value::Vector(decode_vector(
|
||||||
|
reader,
|
||||||
|
type_code
|
||||||
|
.primitive()
|
||||||
|
.expect("vector types always have a primitive"),
|
||||||
|
attribute,
|
||||||
|
length,
|
||||||
|
)?))
|
||||||
|
}
|
||||||
|
Shape::List => {
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||||
|
let length = reader.read_length()?;
|
||||||
|
let mut values = Vec::with_capacity(length);
|
||||||
|
for _ in 0..length {
|
||||||
|
values.push(decode_inner(reader)?);
|
||||||
|
}
|
||||||
|
Ok(Value::List(List::new(attribute, values)))
|
||||||
|
}
|
||||||
|
Shape::Dictionary => {
|
||||||
|
let sorted = matches!(type_code, TypeCode::SortedDictionary);
|
||||||
|
let keys = decode_inner(reader)?;
|
||||||
|
let values = decode_inner(reader)?;
|
||||||
|
let dictionary = Dictionary::new(sorted, keys, values);
|
||||||
|
dictionary.validate()?;
|
||||||
|
Ok(Value::Dictionary(dictionary))
|
||||||
|
}
|
||||||
|
Shape::Table => {
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||||
|
let encoded_dictionary = decode_inner(reader)?;
|
||||||
|
let Value::Dictionary(dictionary) = encoded_dictionary else {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"q table payload must contain a dictionary body".to_string(),
|
||||||
|
));
|
||||||
|
};
|
||||||
|
let column_names = extract_symbol_names(dictionary.keys())?;
|
||||||
|
let columns = extract_columns(dictionary.values())?;
|
||||||
|
let table = Table::new(attribute, column_names, columns);
|
||||||
|
table.validate()?;
|
||||||
|
Ok(Value::Table(table))
|
||||||
|
}
|
||||||
|
Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
|
||||||
|
opcode: reader.read_i8()?,
|
||||||
|
}),
|
||||||
|
Shape::Error => {
|
||||||
|
let error_msg = reader.read_symbol()?;
|
||||||
|
Err(CoreError::QRuntime(
|
||||||
|
String::from_utf8_lossy(&error_msg).into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parsed table preamble: everything before the column data.
|
||||||
|
struct TablePreamble {
|
||||||
|
attribute: Attribute,
|
||||||
|
column_names: Vec<bytes::Bytes>,
|
||||||
|
/// Byte offset within the body where column values start (past the
|
||||||
|
/// general-list header).
|
||||||
|
columns_start: usize,
|
||||||
|
num_columns: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the table header, dictionary keys (column names), and list header.
|
||||||
|
///
|
||||||
|
/// Shared by both the sequential and parallel table decode paths.
|
||||||
|
fn parse_table_preamble(body: &bytes::Bytes) -> CoreResult<TablePreamble> {
|
||||||
|
let mut reader = BodyReader::new(body.clone());
|
||||||
|
|
||||||
|
// Table: type(1) + attribute(1)
|
||||||
|
let _type_code = reader.read_i8()?; // 98 = Table
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8()?)?;
|
||||||
|
|
||||||
|
// Dictionary: type(1) + keys + values
|
||||||
|
let dict_type = TypeCode::try_from(reader.read_i8()?)?;
|
||||||
|
if !matches!(dict_type, TypeCode::Dictionary | TypeCode::SortedDictionary) {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"q table payload must contain a dictionary body".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keys = symbol vector (column names)
|
||||||
|
let keys = decode_inner(&mut reader)?;
|
||||||
|
let column_names = extract_symbol_names(&keys)?;
|
||||||
|
|
||||||
|
// Values = general list: type(1) + attr(1) + length(4) + column values
|
||||||
|
let list_type = reader.read_i8()?;
|
||||||
|
if list_type != 0 {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"q table columns must be encoded as a general list".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let _list_attr = reader.read_i8()?;
|
||||||
|
let num_columns = reader.read_length()?;
|
||||||
|
|
||||||
|
if num_columns != column_names.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"table has {} column names but {} column values",
|
||||||
|
column_names.len(),
|
||||||
|
num_columns
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(TablePreamble {
|
||||||
|
attribute,
|
||||||
|
column_names,
|
||||||
|
columns_start: reader.offset,
|
||||||
|
num_columns,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts parallel table decode. Returns `None` if the column count is
|
||||||
|
/// below the threshold, allowing the caller to fall back to sequential.
|
||||||
|
fn try_decode_table_parallel(body: bytes::Bytes, threshold: usize) -> CoreResult<Option<Value>> {
|
||||||
|
let preamble = parse_table_preamble(&body)?;
|
||||||
|
|
||||||
|
if preamble.num_columns < threshold {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use value_byte_extent to find each column's byte range without parsing
|
||||||
|
let mut column_ranges: Vec<(usize, usize)> = Vec::with_capacity(preamble.num_columns);
|
||||||
|
let mut scan = preamble.columns_start;
|
||||||
|
for _ in 0..preamble.num_columns {
|
||||||
|
let extent = value_byte_extent(&body, scan)?;
|
||||||
|
column_ranges.push((scan, scan + extent));
|
||||||
|
scan += extent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parallel decode: each column gets its own byte slice
|
||||||
|
let columns: Vec<CoreResult<Value>> = column_ranges
|
||||||
|
.par_iter()
|
||||||
|
.map(|&(start, end)| {
|
||||||
|
let mut col_reader = BodyReader::new(body.slice(start..end));
|
||||||
|
let value = decode_inner(&mut col_reader)?;
|
||||||
|
if col_reader.remaining() != 0 {
|
||||||
|
return Err(CoreError::TrailingBodyBytes(col_reader.remaining()));
|
||||||
|
}
|
||||||
|
Ok(value)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let columns: Vec<Value> = columns.into_iter().collect::<CoreResult<Vec<_>>>()?;
|
||||||
|
|
||||||
|
let table = Table::new(preamble.attribute, preamble.column_names, columns);
|
||||||
|
table.validate()?;
|
||||||
|
Ok(Some(Value::Table(table)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decodes one q value body from a little-endian byte slice.
|
||||||
|
///
|
||||||
|
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
||||||
|
pub fn decode_value(body: bytes::Bytes, encoding: Encoding) -> CoreResult<Value> {
|
||||||
|
decode_value_with_options(body, encoding, &DecodeOptions::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decodes one q value body with configurable options.
|
||||||
|
///
|
||||||
|
/// When `options.parallel` is `true` and the body contains a table with
|
||||||
|
/// enough columns, columns are decoded in parallel using rayon.
|
||||||
|
pub fn decode_value_with_options(
|
||||||
|
body: bytes::Bytes,
|
||||||
|
encoding: Encoding,
|
||||||
|
options: &DecodeOptions,
|
||||||
|
) -> CoreResult<Value> {
|
||||||
|
if encoding != Encoding::LittleEndian {
|
||||||
|
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast path: parallel table decode
|
||||||
|
if options.parallel && body.first() == Some(&98) {
|
||||||
|
if let Some(table) =
|
||||||
|
try_decode_table_parallel(body.clone(), options.parallel_column_threshold)?
|
||||||
|
{
|
||||||
|
return Ok(table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut reader = BodyReader::new(body);
|
||||||
|
let value = decode_inner(&mut reader)?;
|
||||||
|
if reader.remaining() != 0 {
|
||||||
|
return Err(CoreError::TrailingBodyBytes(reader.remaining()));
|
||||||
|
}
|
||||||
|
Ok(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decodes a full q IPC frame into its header and value.
|
||||||
|
///
|
||||||
|
/// Returns `UnsupportedEndianness` for big-endian payloads.
|
||||||
|
pub fn decode_message(frame_bytes: bytes::Bytes) -> CoreResult<DecodedMessage> {
|
||||||
|
decode_message_with_options(frame_bytes, &DecodeOptions::default())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decodes a full q IPC frame with configurable options.
|
||||||
|
pub fn decode_message_with_options(
|
||||||
|
frame_bytes: bytes::Bytes,
|
||||||
|
options: &DecodeOptions,
|
||||||
|
) -> CoreResult<DecodedMessage> {
|
||||||
|
let frame = Frame::parse(&frame_bytes)?;
|
||||||
|
let header = frame.header();
|
||||||
|
|
||||||
|
if header.encoding() != Encoding::LittleEndian {
|
||||||
|
return Err(CoreError::UnsupportedEndianness(header.encoding()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if header.compression() != Compression::Uncompressed {
|
||||||
|
let decompressed = decompress_ipc_body(frame.body(), header.encoding())?;
|
||||||
|
let value = decode_value_with_options(
|
||||||
|
bytes::Bytes::from(decompressed),
|
||||||
|
header.encoding(),
|
||||||
|
options,
|
||||||
|
)?;
|
||||||
|
return Ok(DecodedMessage::new(header, value));
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = decode_value_with_options(
|
||||||
|
frame_bytes.slice(crate::frame::HEADER_LEN..),
|
||||||
|
header.encoding(),
|
||||||
|
options,
|
||||||
|
)?;
|
||||||
|
Ok(DecodedMessage::new(header, value))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::protocol::Attribute;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_int_atom_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from(vec![i8::from(TypeCode::IntAtom) as u8, 42, 0, 0, 0]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value, Value::Atom(Atom::Int(42)));
|
||||||
|
assert_eq!(value.qtype(), ValueType::atom(Primitive::Int));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_int_vector_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[6_u8, 1, 3, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
value,
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::Sorted,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_symbol_atom_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[245_u8, b'a', b'b', 0]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
value,
|
||||||
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_list_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[0_u8, 0, 2, 0, 0, 0, 250, 42, 0, 0, 0, 245, b'a', b'b', 0]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
value,
|
||||||
|
Value::List(List::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
Value::Atom(Atom::Int(42)),
|
||||||
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab")))
|
||||||
|
],
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_dictionary_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[
|
||||||
|
99_u8, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0,
|
||||||
|
0,
|
||||||
|
]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
value,
|
||||||
|
Value::Dictionary(Dictionary::new(
|
||||||
|
false,
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b")
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]),)),
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_table_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[
|
||||||
|
98_u8, 0, 99, 11, 0, 2, 0, 0, 0, b's', b'y', b'm', 0, b'p', b'x', 0, 0, 0, 2, 0, 0,
|
||||||
|
0, 11, 0, 2, 0, 0, 0, b'a', 0, b'b', 0, 6, 0, 2, 0, 0, 0, 10, 0, 0, 0, 20, 0, 0, 0,
|
||||||
|
]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
value,
|
||||||
|
Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"sym"),
|
||||||
|
bytes::Bytes::from_static(b"px")
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b")
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_i32s(&[10, 20]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_unary_primitive_body() {
|
||||||
|
let value = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[101_u8, 0]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(value, Value::UnaryPrimitive { opcode: 0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_rejects_trailing_bytes() {
|
||||||
|
assert!(matches!(
|
||||||
|
decode_value(
|
||||||
|
bytes::Bytes::from_static(&[250_u8, 42, 0, 0, 0, 99]),
|
||||||
|
Encoding::LittleEndian
|
||||||
|
),
|
||||||
|
Err(CoreError::TrailingBodyBytes(1))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_rejects_malformed_table_structure() {
|
||||||
|
let err = decode_value(
|
||||||
|
bytes::Bytes::from_static(&[
|
||||||
|
98_u8, 0, 99, 11, 0, 1, 0, 0, 0, b'x', 0, 250, 42, 0, 0, 0,
|
||||||
|
]),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
)
|
||||||
|
.unwrap_err();
|
||||||
|
|
||||||
|
assert!(matches!(err, CoreError::InvalidStructure(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn decode_rejects_big_endian() {
|
||||||
|
assert!(matches!(
|
||||||
|
decode_value(
|
||||||
|
bytes::Bytes::from_static(&[250_u8, 0, 0, 0, 42]),
|
||||||
|
Encoding::BigEndian
|
||||||
|
),
|
||||||
|
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Parallel decode tests --
|
||||||
|
|
||||||
|
use crate::encode::encode_value;
|
||||||
|
|
||||||
|
/// Helper: encode a table, decode with parallel=true and parallel=false,
|
||||||
|
/// and verify both produce identical results.
|
||||||
|
fn assert_parallel_matches_sequential(table: &Value) {
|
||||||
|
let body = encode_value(table, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
let seq_opts = DecodeOptions {
|
||||||
|
parallel: false,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let par_opts = DecodeOptions {
|
||||||
|
parallel: true,
|
||||||
|
parallel_column_threshold: 1, // force parallel even for small tables
|
||||||
|
};
|
||||||
|
|
||||||
|
let seq = decode_value_with_options(
|
||||||
|
bytes::Bytes::from(body.clone()),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
&seq_opts,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let par = decode_value_with_options(
|
||||||
|
bytes::Bytes::from(body.clone()),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
&par_opts,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(seq, par, "parallel decode must match sequential decode");
|
||||||
|
assert_eq!(&seq, table, "decoded value must match original");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parallel_decode_multi_column_table() {
|
||||||
|
let table = Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
bytes::Bytes::from_static(b"c"),
|
||||||
|
bytes::Bytes::from_static(b"d"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"x"),
|
||||||
|
bytes::Bytes::from_static(b"y"),
|
||||||
|
bytes::Bytes::from_static(b"z"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_i64s(&[100, 200, 300]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
assert_parallel_matches_sequential(&table);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parallel_decode_mixed_type_columns() {
|
||||||
|
let table = Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"bools"),
|
||||||
|
bytes::Bytes::from_static(b"guids"),
|
||||||
|
bytes::Bytes::from_static(b"chars"),
|
||||||
|
bytes::Bytes::from_static(b"times"),
|
||||||
|
bytes::Bytes::from_static(b"dates"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0])),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Char(bytes::Bytes::from_static(b"ab")),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_times(&[1000, 2000]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_dates(&[100, 200]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
assert_parallel_matches_sequential(&table);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parallel_decode_below_threshold_falls_back_to_sequential() {
|
||||||
|
// 2 columns, threshold 4 → should use sequential path
|
||||||
|
let table = Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3, 4]))),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||||
|
let opts = DecodeOptions {
|
||||||
|
parallel: true,
|
||||||
|
parallel_column_threshold: 4,
|
||||||
|
};
|
||||||
|
let decoded = decode_value_with_options(
|
||||||
|
bytes::Bytes::from(body.clone()),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
&opts,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(decoded, table);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parallel_decode_non_table_ignores_parallel_flag() {
|
||||||
|
// Non-table values should decode normally regardless of parallel flag
|
||||||
|
let value = Value::Atom(Atom::Int(42));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
let opts = DecodeOptions {
|
||||||
|
parallel: true,
|
||||||
|
parallel_column_threshold: 1,
|
||||||
|
};
|
||||||
|
let decoded = decode_value_with_options(
|
||||||
|
bytes::Bytes::from(body.clone()),
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
&opts,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(decoded, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse_table_preamble_correct() {
|
||||||
|
let table = Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
bytes::Bytes::from_static(b"c"),
|
||||||
|
bytes::Bytes::from_static(b"d"),
|
||||||
|
bytes::Bytes::from_static(b"e"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1]))),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[2]))),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[3]))),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[4]))),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[5]))),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||||
|
let preamble = parse_table_preamble(&bytes::Bytes::from(body)).unwrap();
|
||||||
|
assert_eq!(preamble.num_columns, 5);
|
||||||
|
assert_eq!(preamble.column_names.len(), 5);
|
||||||
|
assert_eq!(&preamble.column_names[0][..], b"a");
|
||||||
|
assert_eq!(&preamble.column_names[4][..], b"e");
|
||||||
|
}
|
||||||
|
}
|
||||||
385
crates/qroissant-core/src/encode.rs
Normal file
385
crates/qroissant-core/src/encode.rs
Normal file
|
|
@ -0,0 +1,385 @@
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
use crate::frame::Compression;
|
||||||
|
use crate::frame::Encoding;
|
||||||
|
use crate::frame::MessageType;
|
||||||
|
use crate::frame::serialize_body_as_message;
|
||||||
|
use crate::protocol::TypeCode;
|
||||||
|
use crate::value::Atom;
|
||||||
|
use crate::value::List;
|
||||||
|
use crate::value::Table;
|
||||||
|
use crate::value::Value;
|
||||||
|
use crate::value::Vector;
|
||||||
|
use crate::value::VectorData;
|
||||||
|
|
||||||
|
fn push_i16(buffer: &mut Vec<u8>, value: i16) {
|
||||||
|
buffer.extend_from_slice(&value.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_i32(buffer: &mut Vec<u8>, value: i32) {
|
||||||
|
buffer.extend_from_slice(&value.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_i64(buffer: &mut Vec<u8>, value: i64) {
|
||||||
|
buffer.extend_from_slice(&value.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_f32(buffer: &mut Vec<u8>, value: f32) {
|
||||||
|
buffer.extend_from_slice(&value.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_f64(buffer: &mut Vec<u8>, value: f64) {
|
||||||
|
buffer.extend_from_slice(&value.to_le_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_length(buffer: &mut Vec<u8>, value: usize) {
|
||||||
|
let value = i32::try_from(value).expect("supported q vectors fit in 32-bit length");
|
||||||
|
push_i32(buffer, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_atom(atom: &Atom, buffer: &mut Vec<u8>) {
|
||||||
|
match atom {
|
||||||
|
Atom::Boolean(value) => {
|
||||||
|
buffer.push(TypeCode::BooleanAtom as i8 as u8);
|
||||||
|
buffer.push(u8::from(*value));
|
||||||
|
}
|
||||||
|
Atom::Guid(value) => {
|
||||||
|
buffer.push(TypeCode::GuidAtom as i8 as u8);
|
||||||
|
buffer.extend_from_slice(value);
|
||||||
|
}
|
||||||
|
Atom::Byte(value) => {
|
||||||
|
buffer.push(TypeCode::ByteAtom as i8 as u8);
|
||||||
|
buffer.push(*value);
|
||||||
|
}
|
||||||
|
Atom::Short(value) => {
|
||||||
|
buffer.push(TypeCode::ShortAtom as i8 as u8);
|
||||||
|
push_i16(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Int(value) => {
|
||||||
|
buffer.push(TypeCode::IntAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Long(value) => {
|
||||||
|
buffer.push(TypeCode::LongAtom as i8 as u8);
|
||||||
|
push_i64(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Real(value) => {
|
||||||
|
buffer.push(TypeCode::RealAtom as i8 as u8);
|
||||||
|
push_f32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Float(value) => {
|
||||||
|
buffer.push(TypeCode::FloatAtom as i8 as u8);
|
||||||
|
push_f64(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Char(value) => {
|
||||||
|
buffer.push(TypeCode::CharAtom as i8 as u8);
|
||||||
|
buffer.push(*value);
|
||||||
|
}
|
||||||
|
Atom::Symbol(value) => {
|
||||||
|
buffer.push(TypeCode::SymbolAtom as i8 as u8);
|
||||||
|
buffer.extend_from_slice(value);
|
||||||
|
buffer.push(0);
|
||||||
|
}
|
||||||
|
Atom::Timestamp(value) => {
|
||||||
|
buffer.push(TypeCode::TimestampAtom as i8 as u8);
|
||||||
|
push_i64(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Month(value) => {
|
||||||
|
buffer.push(TypeCode::MonthAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Date(value) => {
|
||||||
|
buffer.push(TypeCode::DateAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Datetime(value) => {
|
||||||
|
buffer.push(TypeCode::DatetimeAtom as i8 as u8);
|
||||||
|
push_f64(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Timespan(value) => {
|
||||||
|
buffer.push(TypeCode::TimespanAtom as i8 as u8);
|
||||||
|
push_i64(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Minute(value) => {
|
||||||
|
buffer.push(TypeCode::MinuteAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Second(value) => {
|
||||||
|
buffer.push(TypeCode::SecondAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
Atom::Time(value) => {
|
||||||
|
buffer.push(TypeCode::TimeAtom as i8 as u8);
|
||||||
|
push_i32(buffer, *value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_vector(vector: &Vector, buffer: &mut Vec<u8>) {
|
||||||
|
let attribute = i8::from(vector.attribute()) as u8;
|
||||||
|
let data = vector.data();
|
||||||
|
let len = data.len();
|
||||||
|
|
||||||
|
// All non-Symbol variants store raw Bytes; pick the type code, write header + raw bytes.
|
||||||
|
let (type_code, raw) = match data {
|
||||||
|
VectorData::Boolean(b) => (TypeCode::BooleanVector, Some(b)),
|
||||||
|
VectorData::Guid(b) => (TypeCode::GuidVector, Some(b)),
|
||||||
|
VectorData::Byte(b) => (TypeCode::ByteVector, Some(b)),
|
||||||
|
VectorData::Short(b) => (TypeCode::ShortVector, Some(b)),
|
||||||
|
VectorData::Int(b) => (TypeCode::IntVector, Some(b)),
|
||||||
|
VectorData::Long(b) => (TypeCode::LongVector, Some(b)),
|
||||||
|
VectorData::Real(b) => (TypeCode::RealVector, Some(b)),
|
||||||
|
VectorData::Float(b) => (TypeCode::FloatVector, Some(b)),
|
||||||
|
VectorData::Char(b) => (TypeCode::CharVector, Some(b)),
|
||||||
|
VectorData::Timestamp(b) => (TypeCode::TimestampVector, Some(b)),
|
||||||
|
VectorData::Month(b) => (TypeCode::MonthVector, Some(b)),
|
||||||
|
VectorData::Date(b) => (TypeCode::DateVector, Some(b)),
|
||||||
|
VectorData::Datetime(b) => (TypeCode::DatetimeVector, Some(b)),
|
||||||
|
VectorData::Timespan(b) => (TypeCode::TimespanVector, Some(b)),
|
||||||
|
VectorData::Minute(b) => (TypeCode::MinuteVector, Some(b)),
|
||||||
|
VectorData::Second(b) => (TypeCode::SecondVector, Some(b)),
|
||||||
|
VectorData::Time(b) => (TypeCode::TimeVector, Some(b)),
|
||||||
|
VectorData::Symbol(_) => (TypeCode::SymbolVector, None),
|
||||||
|
};
|
||||||
|
|
||||||
|
buffer.push(type_code as i8 as u8);
|
||||||
|
buffer.push(attribute);
|
||||||
|
push_length(buffer, len);
|
||||||
|
|
||||||
|
if let Some(raw) = raw {
|
||||||
|
buffer.extend_from_slice(raw);
|
||||||
|
} else if let VectorData::Symbol(values) = data {
|
||||||
|
for value in values {
|
||||||
|
buffer.extend_from_slice(value);
|
||||||
|
buffer.push(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_table(table: &Table, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||||
|
buffer.push(TypeCode::Table as i8 as u8);
|
||||||
|
buffer.push(i8::from(table.attribute()) as u8);
|
||||||
|
|
||||||
|
buffer.push(TypeCode::Dictionary as i8 as u8);
|
||||||
|
buffer.push(TypeCode::SymbolVector as i8 as u8);
|
||||||
|
buffer.push(0);
|
||||||
|
push_length(buffer, table.column_names().len());
|
||||||
|
for name in table.column_names() {
|
||||||
|
buffer.extend_from_slice(name);
|
||||||
|
buffer.push(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer.push(TypeCode::GeneralList as i8 as u8);
|
||||||
|
buffer.push(0);
|
||||||
|
push_length(buffer, table.columns().len());
|
||||||
|
for column in table.columns() {
|
||||||
|
encode_value_into(column, buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_list(list: &List, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||||
|
buffer.push(TypeCode::GeneralList as i8 as u8);
|
||||||
|
buffer.push(i8::from(list.attribute()) as u8);
|
||||||
|
push_length(buffer, list.len());
|
||||||
|
for value in list.values() {
|
||||||
|
encode_value_into(value, buffer)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_value_into(value: &Value, buffer: &mut Vec<u8>) -> CoreResult<()> {
|
||||||
|
match value {
|
||||||
|
Value::Atom(atom) => encode_atom(atom, buffer),
|
||||||
|
Value::Vector(vector) => encode_vector(vector, buffer),
|
||||||
|
Value::List(list) => encode_list(list, buffer)?,
|
||||||
|
Value::Dictionary(dictionary) => {
|
||||||
|
dictionary.validate()?;
|
||||||
|
buffer.push(if dictionary.sorted() {
|
||||||
|
TypeCode::SortedDictionary as i8 as u8
|
||||||
|
} else {
|
||||||
|
TypeCode::Dictionary as i8 as u8
|
||||||
|
});
|
||||||
|
encode_value_into(dictionary.keys(), buffer)?;
|
||||||
|
encode_value_into(dictionary.values(), buffer)?;
|
||||||
|
}
|
||||||
|
Value::Table(table) => {
|
||||||
|
table.validate()?;
|
||||||
|
encode_table(table, buffer)?;
|
||||||
|
}
|
||||||
|
Value::UnaryPrimitive { opcode } => {
|
||||||
|
buffer.push(TypeCode::UnaryPrimitive as i8 as u8);
|
||||||
|
buffer.push(*opcode as u8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encodes a supported q value as a little-endian q IPC body.
|
||||||
|
///
|
||||||
|
/// Returns `UnsupportedEndianness` for big-endian encoding.
|
||||||
|
pub fn encode_value(value: &Value, encoding: Encoding) -> CoreResult<Vec<u8>> {
|
||||||
|
if encoding != Encoding::LittleEndian {
|
||||||
|
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||||
|
}
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
encode_value_into(value, &mut buffer)?;
|
||||||
|
Ok(buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Encodes a supported q value as a full q IPC message.
|
||||||
|
///
|
||||||
|
/// Returns `UnsupportedEndianness` for big-endian encoding.
|
||||||
|
pub fn encode_message(
|
||||||
|
value: &Value,
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
) -> CoreResult<Vec<u8>> {
|
||||||
|
let body = encode_value(value, encoding)?;
|
||||||
|
serialize_body_as_message(&body, encoding, message_type, compression)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::decode::decode_value;
|
||||||
|
use crate::protocol::Attribute;
|
||||||
|
use crate::value::Dictionary;
|
||||||
|
use crate::value::List;
|
||||||
|
use crate::value::Table;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_int_atom_body() {
|
||||||
|
let value = Value::Atom(Atom::Int(42));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(body, vec![250, 42, 0, 0, 0]);
|
||||||
|
assert_eq!(
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_rejects_big_endian() {
|
||||||
|
let value = Value::Vector(Vector::new(
|
||||||
|
Attribute::Sorted,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
encode_value(&value, Encoding::BigEndian),
|
||||||
|
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_symbol_vector_body() {
|
||||||
|
let value = Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"alpha"),
|
||||||
|
bytes::Bytes::from_static(b"beta"),
|
||||||
|
]),
|
||||||
|
));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
body,
|
||||||
|
bytes::Bytes::from_static(b"\x0b\x00\x02\0\0\0alpha\0beta\0")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_list_body() {
|
||||||
|
let value = Value::List(List::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
Value::Atom(Atom::Int(42)),
|
||||||
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab"))),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_dictionary_body() {
|
||||||
|
let value = Value::Dictionary(Dictionary::new(
|
||||||
|
false,
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(Attribute::None, VectorData::from_i32s(&[1, 2]))),
|
||||||
|
));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_table_body() {
|
||||||
|
let value = Value::Table(Table::new(
|
||||||
|
Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"sym"),
|
||||||
|
bytes::Bytes::from_static(b"px"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::from_i32s(&[10, 20]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
let body = encode_value(&value, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap(),
|
||||||
|
value
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encode_rejects_malformed_table_structure() {
|
||||||
|
let value = Value::Table(Table::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"sym"),
|
||||||
|
bytes::Bytes::from_static(b"px"),
|
||||||
|
],
|
||||||
|
vec![Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![bytes::Bytes::from_static(b"a")]),
|
||||||
|
))],
|
||||||
|
));
|
||||||
|
|
||||||
|
let err = encode_value(&value, Encoding::LittleEndian).unwrap_err();
|
||||||
|
assert!(matches!(err, crate::error::CoreError::InvalidStructure(_)));
|
||||||
|
}
|
||||||
|
}
|
||||||
112
crates/qroissant-core/src/error.rs
Normal file
112
crates/qroissant-core/src/error.rs
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::frame::Compression;
|
||||||
|
use crate::frame::Encoding;
|
||||||
|
|
||||||
|
/// Core result type used across the qroissant core crate.
|
||||||
|
pub type CoreResult<T> = Result<T, CoreError>;
|
||||||
|
|
||||||
|
/// Errors produced by low-level q IPC frame handling.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum CoreError {
|
||||||
|
InvalidEncoding(u8),
|
||||||
|
InvalidMessageType(u8),
|
||||||
|
InvalidCompression(u8),
|
||||||
|
InvalidAttribute(i8),
|
||||||
|
InvalidTypeCode(i8),
|
||||||
|
InvalidMessageLength(usize),
|
||||||
|
InvalidCollectionLength(i32),
|
||||||
|
InvalidStructure(String),
|
||||||
|
TruncatedHeader { actual: usize },
|
||||||
|
FrameLengthMismatch { declared: usize, actual: usize },
|
||||||
|
TrailingBodyBytes(usize),
|
||||||
|
UnsupportedEndianness(Encoding),
|
||||||
|
UnsupportedCompression(Compression),
|
||||||
|
UnsupportedTypeCode(i8),
|
||||||
|
LengthOverflow(usize),
|
||||||
|
Io(std::io::Error),
|
||||||
|
QRuntime(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for CoreError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::InvalidEncoding(value) => write!(
|
||||||
|
f,
|
||||||
|
"invalid q IPC encoding value {value}; expected 0 (big-endian) or 1 (little-endian)"
|
||||||
|
),
|
||||||
|
Self::InvalidMessageType(value) => write!(
|
||||||
|
f,
|
||||||
|
"invalid q IPC message type value {value}; expected 0 (asynchronous), 1 (synchronous), or 2 (response)"
|
||||||
|
),
|
||||||
|
Self::InvalidCompression(value) => write!(
|
||||||
|
f,
|
||||||
|
"invalid q IPC compression value {value}; expected 0 (uncompressed), 1 (compressed), or 2 (compressed large)"
|
||||||
|
),
|
||||||
|
Self::InvalidAttribute(value) => write!(
|
||||||
|
f,
|
||||||
|
"invalid q attribute value {value}; expected 0 (none), 1 (sorted), 2 (unique), 3 (parted), or 4 (grouped)"
|
||||||
|
),
|
||||||
|
Self::InvalidTypeCode(value) => write!(f, "invalid q IPC type code {value}"),
|
||||||
|
Self::InvalidMessageLength(length) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"invalid q IPC message length {length}; minimum is 8 bytes"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Self::InvalidCollectionLength(length) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"invalid q collection length {length}; length must be non-negative"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Self::InvalidStructure(message) => write!(f, "{message}"),
|
||||||
|
Self::TruncatedHeader { actual } => write!(
|
||||||
|
f,
|
||||||
|
"truncated q IPC header: expected 8 bytes, received {actual}"
|
||||||
|
),
|
||||||
|
Self::FrameLengthMismatch { declared, actual } => write!(
|
||||||
|
f,
|
||||||
|
"q IPC header declares {declared} bytes, but frame contains {actual}"
|
||||||
|
),
|
||||||
|
Self::TrailingBodyBytes(remaining) => write!(
|
||||||
|
f,
|
||||||
|
"q IPC body contains {remaining} trailing bytes after the decoded value"
|
||||||
|
),
|
||||||
|
Self::UnsupportedEndianness(encoding) => write!(
|
||||||
|
f,
|
||||||
|
"serialization currently supports only little-endian q IPC frames, got {encoding:?}"
|
||||||
|
),
|
||||||
|
Self::UnsupportedCompression(compression) => write!(
|
||||||
|
f,
|
||||||
|
"serialization currently supports only uncompressed q IPC frames, got {compression:?}"
|
||||||
|
),
|
||||||
|
Self::UnsupportedTypeCode(value) => write!(
|
||||||
|
f,
|
||||||
|
"q IPC type code {value} is valid but not implemented yet in the current decoder"
|
||||||
|
),
|
||||||
|
Self::LengthOverflow(length) => write!(
|
||||||
|
f,
|
||||||
|
"q IPC frame length {length} exceeds 32-bit header capacity"
|
||||||
|
),
|
||||||
|
Self::Io(error) => error.fmt(f),
|
||||||
|
Self::QRuntime(message) => write!(f, "q runtime error: {message}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for CoreError {
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
Self::Io(error) => Some(error),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for CoreError {
|
||||||
|
fn from(value: std::io::Error) -> Self {
|
||||||
|
Self::Io(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
518
crates/qroissant-core/src/extent.rs
Normal file
518
crates/qroissant-core/src/extent.rs
Normal file
|
|
@ -0,0 +1,518 @@
|
||||||
|
//! Zero-allocation byte extent calculator for serialized q IPC values.
|
||||||
|
//!
|
||||||
|
//! Given a byte slice and an offset pointing to the start of a serialized q
|
||||||
|
//! value, [`value_byte_extent`] returns how many bytes that value occupies
|
||||||
|
//! without allocating memory or constructing a [`Value`]. This is used by
|
||||||
|
//! the parallel column decoder to split a table's column data into
|
||||||
|
//! independent sub-slices before dispatching them to worker threads.
|
||||||
|
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
use crate::protocol::Primitive;
|
||||||
|
use crate::protocol::Shape;
|
||||||
|
use crate::protocol::TypeCode;
|
||||||
|
|
||||||
|
/// Returns the byte extent of a serialized q value starting at `bytes[offset..]`.
|
||||||
|
///
|
||||||
|
/// The function reads only type codes, attributes, and lengths — it never
|
||||||
|
/// allocates or constructs a `Value`. For fixed-width vectors this is O(1);
|
||||||
|
/// for symbol vectors and nested structures it scans forward.
|
||||||
|
pub fn value_byte_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||||
|
if offset >= bytes.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"extent: offset {offset} beyond buffer length {}",
|
||||||
|
bytes.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let type_code = TypeCode::try_from(bytes[offset] as i8)?;
|
||||||
|
let shape = type_code.shape();
|
||||||
|
|
||||||
|
match shape {
|
||||||
|
Shape::Atom => atom_extent(bytes, offset, type_code),
|
||||||
|
Shape::Vector => vector_extent(bytes, offset, type_code),
|
||||||
|
Shape::List => list_extent(bytes, offset),
|
||||||
|
Shape::Dictionary => dictionary_extent(bytes, offset),
|
||||||
|
Shape::Table => table_extent(bytes, offset),
|
||||||
|
Shape::UnaryPrimitive => {
|
||||||
|
// type byte + opcode byte
|
||||||
|
check_available(bytes, offset, 2)?;
|
||||||
|
Ok(2)
|
||||||
|
}
|
||||||
|
Shape::Error => {
|
||||||
|
check_available(bytes, offset, 1)?;
|
||||||
|
let data_start = offset + 1;
|
||||||
|
let pos = bytes[data_start..]
|
||||||
|
.iter()
|
||||||
|
.position(|&b| b == 0)
|
||||||
|
.ok_or_else(|| {
|
||||||
|
CoreError::InvalidStructure(format!(
|
||||||
|
"extent: unterminated error string at offset {offset}"
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
Ok(1 + pos + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks that at least `need` bytes are available from `offset`.
|
||||||
|
#[inline]
|
||||||
|
fn check_available(bytes: &[u8], offset: usize, need: usize) -> CoreResult<()> {
|
||||||
|
if offset + need > bytes.len() {
|
||||||
|
Err(CoreError::InvalidStructure(format!(
|
||||||
|
"extent: need {need} bytes at offset {offset}, but buffer length is {}",
|
||||||
|
bytes.len()
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads an i32 length field at `bytes[offset..offset+4]` (little-endian).
|
||||||
|
#[inline]
|
||||||
|
fn read_len(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||||
|
check_available(bytes, offset, 4)?;
|
||||||
|
let len = i32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap());
|
||||||
|
if len < 0 {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"extent: negative length {len} at offset {offset}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(len as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn atom_extent(bytes: &[u8], offset: usize, type_code: TypeCode) -> CoreResult<usize> {
|
||||||
|
// 1 byte for type code + data bytes
|
||||||
|
let primitive = type_code
|
||||||
|
.primitive()
|
||||||
|
.ok_or(CoreError::InvalidTypeCode(type_code as i8))?;
|
||||||
|
|
||||||
|
if let Some(width) = primitive.width() {
|
||||||
|
check_available(bytes, offset, 1 + width)?;
|
||||||
|
Ok(1 + width)
|
||||||
|
} else {
|
||||||
|
// Symbol atom: scan for null terminator
|
||||||
|
debug_assert_eq!(primitive, Primitive::Symbol);
|
||||||
|
let data_start = offset + 1;
|
||||||
|
let pos = bytes[data_start..]
|
||||||
|
.iter()
|
||||||
|
.position(|&b| b == 0)
|
||||||
|
.ok_or_else(|| {
|
||||||
|
CoreError::InvalidStructure(format!(
|
||||||
|
"extent: unterminated symbol atom at offset {offset}"
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
// type byte + symbol bytes + null terminator
|
||||||
|
Ok(1 + pos + 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vector_extent(bytes: &[u8], offset: usize, type_code: TypeCode) -> CoreResult<usize> {
|
||||||
|
// Header: 1 (type) + 1 (attribute) + 4 (length) = 6 bytes
|
||||||
|
const HEADER: usize = 6;
|
||||||
|
check_available(bytes, offset, HEADER)?;
|
||||||
|
let length = read_len(bytes, offset + 2)?;
|
||||||
|
|
||||||
|
let primitive = type_code
|
||||||
|
.primitive()
|
||||||
|
.ok_or(CoreError::InvalidTypeCode(type_code as i8))?;
|
||||||
|
|
||||||
|
if let Some(width) = primitive.width() {
|
||||||
|
let data_bytes = length
|
||||||
|
.checked_mul(width)
|
||||||
|
.ok_or(CoreError::LengthOverflow(length))?;
|
||||||
|
check_available(bytes, offset, HEADER + data_bytes)?;
|
||||||
|
Ok(HEADER + data_bytes)
|
||||||
|
} else {
|
||||||
|
// Symbol vector: scan through `length` null-terminated strings
|
||||||
|
debug_assert_eq!(primitive, Primitive::Symbol);
|
||||||
|
let mut scan = offset + HEADER;
|
||||||
|
for _ in 0..length {
|
||||||
|
let pos = bytes[scan..].iter().position(|&b| b == 0).ok_or_else(|| {
|
||||||
|
CoreError::InvalidStructure(format!(
|
||||||
|
"extent: unterminated symbol in vector at offset {scan}"
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
scan += pos + 1; // skip past the null terminator
|
||||||
|
}
|
||||||
|
Ok(scan - offset)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||||
|
// Header: 1 (type) + 1 (attribute) + 4 (length) = 6 bytes
|
||||||
|
const HEADER: usize = 6;
|
||||||
|
check_available(bytes, offset, HEADER)?;
|
||||||
|
let length = read_len(bytes, offset + 2)?;
|
||||||
|
|
||||||
|
let mut scan = offset + HEADER;
|
||||||
|
for _ in 0..length {
|
||||||
|
let child_extent = value_byte_extent(bytes, scan)?;
|
||||||
|
scan += child_extent;
|
||||||
|
}
|
||||||
|
Ok(scan - offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dictionary_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||||
|
// 1 byte for type code (99 or 127), then keys value, then values value
|
||||||
|
check_available(bytes, offset, 1)?;
|
||||||
|
let keys_extent = value_byte_extent(bytes, offset + 1)?;
|
||||||
|
let values_extent = value_byte_extent(bytes, offset + 1 + keys_extent)?;
|
||||||
|
Ok(1 + keys_extent + values_extent)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn table_extent(bytes: &[u8], offset: usize) -> CoreResult<usize> {
|
||||||
|
// 1 byte type code + 1 byte attribute + inner dictionary
|
||||||
|
check_available(bytes, offset, 2)?;
|
||||||
|
let dict_extent = value_byte_extent(bytes, offset + 2)?;
|
||||||
|
Ok(2 + dict_extent)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::decode::decode_value;
|
||||||
|
use crate::encode::encode_value;
|
||||||
|
use crate::frame::Encoding;
|
||||||
|
use crate::value::*;
|
||||||
|
|
||||||
|
/// Helper: encode a value, then verify extent equals encoded body length.
|
||||||
|
fn assert_extent_matches(value: &Value) {
|
||||||
|
let body = encode_value(value, Encoding::LittleEndian).unwrap();
|
||||||
|
let extent = value_byte_extent(&body, 0).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
extent,
|
||||||
|
body.len(),
|
||||||
|
"extent mismatch for {value:?}: expected {}, got {extent}",
|
||||||
|
body.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Atoms --
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_boolean_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Boolean(true)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_byte_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Byte(0x42)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_short_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Short(42)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_int_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Int(42)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_long_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Long(42)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_real_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Real(1.5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_float_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Float(1.5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_char_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Char(b'c')));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_symbol_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Symbol(bytes::Bytes::from_static(
|
||||||
|
b"hello",
|
||||||
|
))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_empty_symbol_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b""))));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_guid_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Guid([0u8; 16])));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_timestamp_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Timestamp(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_month_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Month(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_date_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Date(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_datetime_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Datetime(1.5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_timespan_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Timespan(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_minute_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Minute(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_second_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Second(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_time_atom() {
|
||||||
|
assert_extent_matches(&Value::Atom(Atom::Time(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Vectors --
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_int_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_empty_int_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_symbol_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"alpha"),
|
||||||
|
bytes::Bytes::from_static(b"beta"),
|
||||||
|
]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_empty_symbol_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_boolean_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Boolean(bytes::Bytes::from_static(&[1, 0, 1])),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_guid_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_guids(&[[0u8; 16], [1u8; 16]]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_long_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i64s(&[1, 2, 3]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_float_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_f64s(&[1.0, 2.0]),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_char_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Char(bytes::Bytes::from_static(b"hello")),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_byte_vector() {
|
||||||
|
assert_extent_matches(&Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Byte(bytes::Bytes::from(vec![1, 2, 3])),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// -- Composites --
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_general_list() {
|
||||||
|
assert_extent_matches(&Value::List(List::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![
|
||||||
|
Value::Atom(Atom::Int(42)),
|
||||||
|
Value::Atom(Atom::Symbol(bytes::Bytes::from_static(b"ab"))),
|
||||||
|
],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_empty_list() {
|
||||||
|
assert_extent_matches(&Value::List(List::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_dictionary() {
|
||||||
|
assert_extent_matches(&Value::Dictionary(Dictionary::new(
|
||||||
|
false,
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2]),
|
||||||
|
)),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_sorted_dictionary() {
|
||||||
|
assert_extent_matches(&Value::Dictionary(Dictionary::new(
|
||||||
|
true,
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::Sorted,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2]),
|
||||||
|
)),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_table() {
|
||||||
|
assert_extent_matches(&Value::Table(Table::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"sym"),
|
||||||
|
bytes::Bytes::from_static(b"px"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[10, 20]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_nested_list() {
|
||||||
|
assert_extent_matches(&Value::List(List::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[4, 5]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn extent_unary_primitive() {
|
||||||
|
let value = Value::UnaryPrimitive { opcode: 42 };
|
||||||
|
assert_extent_matches(&value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify extent matches for every value encoded in a real roundtrip body.
|
||||||
|
#[test]
|
||||||
|
fn extent_matches_decode_consumption() {
|
||||||
|
// Encode a table, get the body, verify extent == body.len()
|
||||||
|
let table = Value::Table(Table::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
vec![
|
||||||
|
bytes::Bytes::from_static(b"a"),
|
||||||
|
bytes::Bytes::from_static(b"b"),
|
||||||
|
bytes::Bytes::from_static(b"c"),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_i32s(&[1, 2, 3]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::Symbol(vec![
|
||||||
|
bytes::Bytes::from_static(b"x"),
|
||||||
|
bytes::Bytes::from_static(b"y"),
|
||||||
|
bytes::Bytes::from_static(b"z"),
|
||||||
|
]),
|
||||||
|
)),
|
||||||
|
Value::Vector(Vector::new(
|
||||||
|
crate::protocol::Attribute::None,
|
||||||
|
VectorData::from_f64s(&[1.0, 2.0, 3.0]),
|
||||||
|
)),
|
||||||
|
],
|
||||||
|
));
|
||||||
|
let body = encode_value(&table, Encoding::LittleEndian).unwrap();
|
||||||
|
let extent = value_byte_extent(&body, 0).unwrap();
|
||||||
|
assert_eq!(extent, body.len());
|
||||||
|
|
||||||
|
// Also verify roundtrip
|
||||||
|
let decoded =
|
||||||
|
decode_value(bytes::Bytes::from(body.clone()), Encoding::LittleEndian).unwrap();
|
||||||
|
assert_eq!(decoded, table);
|
||||||
|
}
|
||||||
|
}
|
||||||
826
crates/qroissant-core/src/frame.rs
Normal file
826
crates/qroissant-core/src/frame.rs
Normal file
|
|
@ -0,0 +1,826 @@
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
|
||||||
|
/// Fixed byte length of every q IPC message header.
|
||||||
|
pub const HEADER_LEN: usize = 8;
|
||||||
|
|
||||||
|
/// Endianness marker stored in the first q IPC header byte.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum Encoding {
|
||||||
|
BigEndian,
|
||||||
|
#[default]
|
||||||
|
LittleEndian,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Encoding {
|
||||||
|
fn decode_u32(self, bytes: [u8; 4]) -> u32 {
|
||||||
|
match self {
|
||||||
|
Self::BigEndian => u32::from_be_bytes(bytes),
|
||||||
|
Self::LittleEndian => u32::from_le_bytes(bytes),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_u32(self, value: u32) -> [u8; 4] {
|
||||||
|
match self {
|
||||||
|
Self::BigEndian => value.to_be_bytes(),
|
||||||
|
Self::LittleEndian => value.to_le_bytes(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Encoding> for u8 {
|
||||||
|
fn from(value: Encoding) -> Self {
|
||||||
|
match value {
|
||||||
|
Encoding::BigEndian => 0,
|
||||||
|
Encoding::LittleEndian => 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u8> for Encoding {
|
||||||
|
type Error = CoreError;
|
||||||
|
|
||||||
|
fn try_from(value: u8) -> CoreResult<Self> {
|
||||||
|
match value {
|
||||||
|
0 => Ok(Self::BigEndian),
|
||||||
|
1 => Ok(Self::LittleEndian),
|
||||||
|
_ => Err(CoreError::InvalidEncoding(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q IPC message kind stored in the second q IPC header byte.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum MessageType {
|
||||||
|
#[default]
|
||||||
|
Asynchronous,
|
||||||
|
Synchronous,
|
||||||
|
Response,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<MessageType> for u8 {
|
||||||
|
fn from(value: MessageType) -> Self {
|
||||||
|
match value {
|
||||||
|
MessageType::Asynchronous => 0,
|
||||||
|
MessageType::Synchronous => 1,
|
||||||
|
MessageType::Response => 2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u8> for MessageType {
|
||||||
|
type Error = CoreError;
|
||||||
|
|
||||||
|
fn try_from(value: u8) -> CoreResult<Self> {
|
||||||
|
match value {
|
||||||
|
0 => Ok(Self::Asynchronous),
|
||||||
|
1 => Ok(Self::Synchronous),
|
||||||
|
2 => Ok(Self::Response),
|
||||||
|
_ => Err(CoreError::InvalidMessageType(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q IPC compression marker stored in the third q IPC header byte.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum Compression {
|
||||||
|
#[default]
|
||||||
|
Uncompressed,
|
||||||
|
Compressed,
|
||||||
|
CompressedLarge,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Compression> for u8 {
|
||||||
|
fn from(value: Compression) -> Self {
|
||||||
|
match value {
|
||||||
|
Compression::Uncompressed => 0,
|
||||||
|
Compression::Compressed => 1,
|
||||||
|
Compression::CompressedLarge => 2,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<u8> for Compression {
|
||||||
|
type Error = CoreError;
|
||||||
|
|
||||||
|
fn try_from(value: u8) -> CoreResult<Self> {
|
||||||
|
match value {
|
||||||
|
0 => Ok(Self::Uncompressed),
|
||||||
|
1 => Ok(Self::Compressed),
|
||||||
|
2 => Ok(Self::CompressedLarge),
|
||||||
|
_ => Err(CoreError::InvalidCompression(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decoded q IPC message header.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct MessageHeader {
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MessageHeader {
|
||||||
|
/// Builds a validated message header.
|
||||||
|
pub fn new(
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
size: usize,
|
||||||
|
) -> CoreResult<Self> {
|
||||||
|
if size < HEADER_LEN {
|
||||||
|
return Err(CoreError::InvalidMessageLength(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
encoding,
|
||||||
|
message_type,
|
||||||
|
compression,
|
||||||
|
size,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a message header from an exact 8-byte array.
|
||||||
|
pub fn from_bytes(bytes: [u8; HEADER_LEN]) -> CoreResult<Self> {
|
||||||
|
let encoding = Encoding::try_from(bytes[0])?;
|
||||||
|
let message_type = MessageType::try_from(bytes[1])?;
|
||||||
|
let compression = Compression::try_from(bytes[2])?;
|
||||||
|
let size = encoding.decode_u32(bytes[4..8].try_into().expect("fixed-size slice")) as usize;
|
||||||
|
Self::new(encoding, message_type, compression, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a message header from a byte slice.
|
||||||
|
pub fn parse(bytes: &[u8]) -> CoreResult<Self> {
|
||||||
|
let header: [u8; HEADER_LEN] = bytes
|
||||||
|
.get(..HEADER_LEN)
|
||||||
|
.ok_or(CoreError::TruncatedHeader {
|
||||||
|
actual: bytes.len(),
|
||||||
|
})?
|
||||||
|
.try_into()
|
||||||
|
.expect("header slice length already checked");
|
||||||
|
Self::from_bytes(header)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes the header back to its q IPC byte representation.
|
||||||
|
pub fn to_bytes(self) -> CoreResult<[u8; HEADER_LEN]> {
|
||||||
|
let size = u32::try_from(self.size).map_err(|_| CoreError::LengthOverflow(self.size))?;
|
||||||
|
let mut bytes = [0_u8; HEADER_LEN];
|
||||||
|
bytes[0] = self.encoding.into();
|
||||||
|
bytes[1] = self.message_type.into();
|
||||||
|
bytes[2] = self.compression.into();
|
||||||
|
bytes[4..8].copy_from_slice(&self.encoding.encode_u32(size));
|
||||||
|
Ok(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encoding(self) -> Encoding {
|
||||||
|
self.encoding
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn message_type(self) -> MessageType {
|
||||||
|
self.message_type
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compression(self) -> Compression {
|
||||||
|
self.compression
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn size(self) -> usize {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn body_len(self) -> usize {
|
||||||
|
self.size - HEADER_LEN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Borrowed validated q IPC frame.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct Frame<'a> {
|
||||||
|
header: MessageHeader,
|
||||||
|
body: &'a [u8],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Frame<'a> {
|
||||||
|
/// Validates a full q IPC frame and returns borrowed header/body views.
|
||||||
|
pub fn parse(bytes: &'a [u8]) -> CoreResult<Self> {
|
||||||
|
let header = MessageHeader::parse(bytes)?;
|
||||||
|
if bytes.len() != header.size() {
|
||||||
|
return Err(CoreError::FrameLengthMismatch {
|
||||||
|
declared: header.size(),
|
||||||
|
actual: bytes.len(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
header,
|
||||||
|
body: &bytes[HEADER_LEN..],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn header(self) -> MessageHeader {
|
||||||
|
self.header
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn body(self) -> &'a [u8] {
|
||||||
|
self.body
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decompresses a q IPC compressed body (follows the 8-byte header).
|
||||||
|
///
|
||||||
|
/// The first 4 bytes of the compressed body are a size prefix encoding the
|
||||||
|
/// total decompressed message length including the 8-byte header. The
|
||||||
|
/// remaining bytes are the compressed payload using q's LZW-style algorithm:
|
||||||
|
/// a flag byte drives 8 decisions — bit clear emits a literal byte, bit set
|
||||||
|
/// emits a back-reference (2 fixed bytes + n extra bytes) via a 256-entry
|
||||||
|
/// XOR-keyed lookup table.
|
||||||
|
pub fn decompress_ipc_body(compressed: &[u8], encoding: Encoding) -> CoreResult<Vec<u8>> {
|
||||||
|
if compressed.len() < 4 {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"compressed body must be at least 4 bytes for size prefix, got {}",
|
||||||
|
compressed.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let size_with_header = match encoding {
|
||||||
|
Encoding::LittleEndian => {
|
||||||
|
i32::from_le_bytes(compressed[..4].try_into().expect("validated length"))
|
||||||
|
}
|
||||||
|
Encoding::BigEndian => {
|
||||||
|
i32::from_be_bytes(compressed[..4].try_into().expect("validated length"))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if size_with_header < 8 {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"compressed size prefix {size_with_header} is less than minimum header size 8"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let size = (size_with_header - 8) as usize;
|
||||||
|
|
||||||
|
let mut decompressed = vec![0_u8; size];
|
||||||
|
let mut aa = [0_i32; 256];
|
||||||
|
let mut n = 0_usize;
|
||||||
|
let mut f = 0_usize;
|
||||||
|
let mut s = 0_usize;
|
||||||
|
let mut p = 0_usize;
|
||||||
|
let mut i = 0_usize;
|
||||||
|
let mut d = 4_usize; // skip the 4-byte size prefix
|
||||||
|
|
||||||
|
while s < size {
|
||||||
|
if i == 0 {
|
||||||
|
if d >= compressed.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"unexpected end of compressed data while reading flag byte".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
f = compressed[d] as usize;
|
||||||
|
d += 1;
|
||||||
|
i = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (f & i) != 0 {
|
||||||
|
// Back-reference: lookup key byte + extra count byte
|
||||||
|
if d + 2 > compressed.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"insufficient data for back-reference (need 2 bytes)".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let mut r = aa[compressed[d] as usize] as usize;
|
||||||
|
d += 1;
|
||||||
|
|
||||||
|
if r >= size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"back-reference start {r} exceeds decompressed buffer size {size}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if s >= size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"write index {s} exceeds decompressed buffer size {size}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
decompressed[s] = decompressed[r];
|
||||||
|
s += 1;
|
||||||
|
r += 1;
|
||||||
|
|
||||||
|
if r >= size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"back-reference position {r} exceeds decompressed buffer size {size}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if s >= size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"write index {s} exceeds decompressed buffer size {size}"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
decompressed[s] = decompressed[r];
|
||||||
|
s += 1;
|
||||||
|
r += 1;
|
||||||
|
|
||||||
|
n = compressed[d] as usize;
|
||||||
|
d += 1;
|
||||||
|
|
||||||
|
if r + n > size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"back-reference range {r}..{} exceeds decompressed buffer size {size}",
|
||||||
|
r + n
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
if s + n > size {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"write range {s}..{} exceeds decompressed buffer size {size}",
|
||||||
|
s + n
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
for m in 0..n {
|
||||||
|
decompressed[s + m] = decompressed[r + m];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Literal byte
|
||||||
|
if d >= compressed.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"unexpected end of compressed data while reading literal byte".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
decompressed[s] = compressed[d];
|
||||||
|
s += 1;
|
||||||
|
d += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the XOR lookup table with newly emitted bytes
|
||||||
|
while p < s.saturating_sub(1) {
|
||||||
|
aa[(decompressed[p] ^ decompressed[p + 1]) as usize] = p as i32;
|
||||||
|
p += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (f & i) != 0 {
|
||||||
|
s += n;
|
||||||
|
p = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
i *= 2;
|
||||||
|
if i == 256 {
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(decompressed)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes a q-encoded body as a complete q IPC message.
|
||||||
|
///
|
||||||
|
/// This mirrors the current rewrite contract: qroissant only emits
|
||||||
|
/// little-endian, uncompressed frames for now.
|
||||||
|
pub fn serialize_body_as_message(
|
||||||
|
body: &[u8],
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
) -> CoreResult<Vec<u8>> {
|
||||||
|
if encoding != Encoding::LittleEndian {
|
||||||
|
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||||
|
}
|
||||||
|
if compression != Compression::Uncompressed {
|
||||||
|
return Err(CoreError::UnsupportedCompression(compression));
|
||||||
|
}
|
||||||
|
|
||||||
|
let size = HEADER_LEN
|
||||||
|
.checked_add(body.len())
|
||||||
|
.ok_or(CoreError::LengthOverflow(usize::MAX))?;
|
||||||
|
let header = MessageHeader::new(encoding, message_type, compression, size)?;
|
||||||
|
let mut payload = Vec::with_capacity(size);
|
||||||
|
payload.extend_from_slice(&header.to_bytes()?);
|
||||||
|
payload.extend_from_slice(body);
|
||||||
|
Ok(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads the total q IPC frame length from an 8-byte header.
|
||||||
|
pub fn read_message_length(header: &[u8; HEADER_LEN]) -> CoreResult<usize> {
|
||||||
|
Ok(MessageHeader::from_bytes(*header)?.size())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads one complete q IPC frame from an IO stream.
|
||||||
|
pub fn read_frame<R: Read>(reader: &mut R) -> CoreResult<Vec<u8>> {
|
||||||
|
let mut header = [0_u8; HEADER_LEN];
|
||||||
|
reader.read_exact(&mut header)?;
|
||||||
|
let frame_len = read_message_length(&header)?;
|
||||||
|
let mut frame = vec![0_u8; frame_len];
|
||||||
|
frame[..HEADER_LEN].copy_from_slice(&header);
|
||||||
|
reader.read_exact(&mut frame[HEADER_LEN..])?;
|
||||||
|
Ok(frame)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Incremental q IPC decompressor that can be fed compressed bytes as they
|
||||||
|
/// arrive from the network, overlapping I/O with decompression work.
|
||||||
|
///
|
||||||
|
/// The q LZW algorithm reads compressed input forward-only — back-references
|
||||||
|
/// target the *output* buffer, not the input. This means we can process
|
||||||
|
/// compressed bytes as soon as they arrive without buffering the entire
|
||||||
|
/// compressed payload first.
|
||||||
|
///
|
||||||
|
/// # Usage
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian)?;
|
||||||
|
/// while !dec.is_complete() {
|
||||||
|
/// let chunk = read_from_network()?;
|
||||||
|
/// dec.feed(&chunk)?;
|
||||||
|
/// }
|
||||||
|
/// let body = dec.finish()?;
|
||||||
|
/// ```
|
||||||
|
pub struct StreamingDecompressor {
|
||||||
|
decompressed: Vec<u8>,
|
||||||
|
aa: [i32; 256],
|
||||||
|
compressed_buf: Vec<u8>,
|
||||||
|
d: usize,
|
||||||
|
s: usize,
|
||||||
|
p: usize,
|
||||||
|
f: usize,
|
||||||
|
i: usize,
|
||||||
|
size: usize,
|
||||||
|
read_ptr: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StreamingDecompressor {
|
||||||
|
/// Creates a new streaming decompressor from the 4-byte size prefix
|
||||||
|
/// (the first 4 bytes of the compressed body after the 8-byte header).
|
||||||
|
pub fn new(size_prefix: [u8; 4], encoding: Encoding) -> CoreResult<Self> {
|
||||||
|
let size_with_header = match encoding {
|
||||||
|
Encoding::LittleEndian => i32::from_le_bytes(size_prefix),
|
||||||
|
Encoding::BigEndian => i32::from_be_bytes(size_prefix),
|
||||||
|
};
|
||||||
|
if size_with_header < 8 {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"compressed size prefix {size_with_header} is less than minimum header size 8"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
let size = (size_with_header - 8) as usize;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
decompressed: vec![0_u8; size],
|
||||||
|
aa: [0_i32; 256],
|
||||||
|
compressed_buf: Vec::new(),
|
||||||
|
d: 0,
|
||||||
|
s: 0,
|
||||||
|
p: 0,
|
||||||
|
f: 0,
|
||||||
|
i: 0,
|
||||||
|
size,
|
||||||
|
read_ptr: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn feed(&mut self, chunk: &[u8]) -> CoreResult<usize> {
|
||||||
|
self.compressed_buf.extend_from_slice(chunk);
|
||||||
|
let prev_s = self.s;
|
||||||
|
|
||||||
|
while self.s < self.size {
|
||||||
|
if self.i == 0 {
|
||||||
|
if self.d >= self.compressed_buf.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.f = self.compressed_buf[self.d] as usize;
|
||||||
|
self.d += 1;
|
||||||
|
self.i = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let is_backref = (self.f & self.i) != 0;
|
||||||
|
let mut n = 0;
|
||||||
|
|
||||||
|
if is_backref {
|
||||||
|
if self.d + 2 > self.compressed_buf.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let mut r = self.aa[self.compressed_buf[self.d] as usize] as usize;
|
||||||
|
self.d += 1;
|
||||||
|
if r >= self.size || self.s + 2 > self.size {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"backref out of bounds".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
self.decompressed[self.s] = self.decompressed[r];
|
||||||
|
self.s += 1;
|
||||||
|
r += 1;
|
||||||
|
|
||||||
|
if r >= self.size || self.s + 1 > self.size {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"backref out of bounds".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
self.decompressed[self.s] = self.decompressed[r];
|
||||||
|
self.s += 1;
|
||||||
|
r += 1;
|
||||||
|
|
||||||
|
n = self.compressed_buf[self.d] as usize;
|
||||||
|
self.d += 1;
|
||||||
|
if r + n > self.size || self.s + n > self.size {
|
||||||
|
return Err(CoreError::InvalidStructure(
|
||||||
|
"backref out of bounds".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
for m in 0..n {
|
||||||
|
self.decompressed[self.s + m] = self.decompressed[r + m];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if self.d >= self.compressed_buf.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.decompressed[self.s] = self.compressed_buf[self.d];
|
||||||
|
self.s += 1;
|
||||||
|
self.d += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sync lookup table
|
||||||
|
while self.p < self.s.saturating_sub(1) {
|
||||||
|
self.aa[(self.decompressed[self.p] ^ self.decompressed[self.p + 1]) as usize] =
|
||||||
|
self.p as i32;
|
||||||
|
self.p += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if is_backref {
|
||||||
|
self.s += n;
|
||||||
|
self.p = self.s;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.i *= 2;
|
||||||
|
if self.i == 256 {
|
||||||
|
self.i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep memory usage in check by draining processed bytes
|
||||||
|
if self.d > 0 {
|
||||||
|
self.compressed_buf.drain(0..self.d);
|
||||||
|
self.d = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.s - prev_s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` when decompression is complete.
|
||||||
|
pub fn is_complete(&self) -> bool {
|
||||||
|
self.s >= self.size
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Current number of decompressed bytes available.
|
||||||
|
pub fn decompressed_len(&self) -> usize {
|
||||||
|
self.s
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Number of decompressed bytes that have not yet been read.
|
||||||
|
pub fn unread_len(&self) -> usize {
|
||||||
|
self.s - self.read_ptr
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a slice of the next available decompressed bytes.
|
||||||
|
pub fn next_chunk(&self) -> &[u8] {
|
||||||
|
&self.decompressed[self.read_ptr..self.s]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advances the read pointer by `len` bytes.
|
||||||
|
pub fn consume(&mut self, len: usize) {
|
||||||
|
self.read_ptr = (self.read_ptr + len).min(self.s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Total expected decompressed size.
|
||||||
|
pub fn total_size(&self) -> usize {
|
||||||
|
self.size
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Borrows the decompressed output produced so far.
|
||||||
|
pub fn decompressed(&self) -> &[u8] {
|
||||||
|
&self.decompressed[..self.s]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes the decompressor and returns the completed output buffer.
|
||||||
|
///
|
||||||
|
/// Returns an error if decompression is not yet complete.
|
||||||
|
pub fn finish(self) -> CoreResult<Vec<u8>> {
|
||||||
|
if !self.is_complete() {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"streaming decompress: incomplete — {}/{} bytes decompressed",
|
||||||
|
self.s, self.size
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(self.decompressed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn encoding_round_trips_from_u8() {
|
||||||
|
assert_eq!(Encoding::try_from(0).unwrap(), Encoding::BigEndian);
|
||||||
|
assert_eq!(Encoding::try_from(1).unwrap(), Encoding::LittleEndian);
|
||||||
|
assert!(matches!(
|
||||||
|
Encoding::try_from(9),
|
||||||
|
Err(CoreError::InvalidEncoding(9))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compression_supports_compressed_large() {
|
||||||
|
assert_eq!(Compression::try_from(0).unwrap(), Compression::Uncompressed);
|
||||||
|
assert_eq!(Compression::try_from(1).unwrap(), Compression::Compressed);
|
||||||
|
assert_eq!(
|
||||||
|
Compression::try_from(2).unwrap(),
|
||||||
|
Compression::CompressedLarge
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn header_parses_little_endian_payloads() {
|
||||||
|
let header = MessageHeader::from_bytes([1, 2, 2, 0, 24, 0, 0, 0]).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(header.encoding(), Encoding::LittleEndian);
|
||||||
|
assert_eq!(header.message_type(), MessageType::Response);
|
||||||
|
assert_eq!(header.compression(), Compression::CompressedLarge);
|
||||||
|
assert_eq!(header.size(), 24);
|
||||||
|
assert_eq!(header.body_len(), 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn header_parses_big_endian_lengths() {
|
||||||
|
let header = MessageHeader::from_bytes([0, 1, 0, 0, 0, 0, 0, 16]).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(header.encoding(), Encoding::BigEndian);
|
||||||
|
assert_eq!(header.message_type(), MessageType::Synchronous);
|
||||||
|
assert_eq!(header.size(), 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn header_rejects_lengths_smaller_than_header() {
|
||||||
|
assert!(matches!(
|
||||||
|
MessageHeader::from_bytes([1, 2, 0, 0, 7, 0, 0, 0]),
|
||||||
|
Err(CoreError::InvalidMessageLength(7))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn header_to_bytes_round_trips() {
|
||||||
|
let header = MessageHeader::new(
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
MessageType::Response,
|
||||||
|
Compression::Compressed,
|
||||||
|
64,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let bytes = header.to_bytes().unwrap();
|
||||||
|
assert_eq!(MessageHeader::from_bytes(bytes).unwrap(), header);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn frame_parse_validates_declared_length() {
|
||||||
|
let frame = [1, 2, 0, 0, 10, 0, 0, 0, 42, 43];
|
||||||
|
let parsed = Frame::parse(&frame).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(parsed.header().size(), 10);
|
||||||
|
assert_eq!(parsed.body(), &[42, 43]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn frame_parse_rejects_length_mismatch() {
|
||||||
|
let frame = [1, 2, 0, 0, 11, 0, 0, 0, 42, 43];
|
||||||
|
assert!(matches!(
|
||||||
|
Frame::parse(&frame),
|
||||||
|
Err(CoreError::FrameLengthMismatch {
|
||||||
|
declared: 11,
|
||||||
|
actual: 10
|
||||||
|
})
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_body_wraps_uncompressed_little_endian_body() {
|
||||||
|
let payload = serialize_body_as_message(
|
||||||
|
&[10, 20, 30],
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
MessageType::Synchronous,
|
||||||
|
Compression::Uncompressed,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(payload, vec![1, 1, 0, 0, 11, 0, 0, 0, 10, 20, 30]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_body_rejects_big_endian_for_now() {
|
||||||
|
assert!(matches!(
|
||||||
|
serialize_body_as_message(
|
||||||
|
&[1],
|
||||||
|
Encoding::BigEndian,
|
||||||
|
MessageType::Asynchronous,
|
||||||
|
Compression::Uncompressed,
|
||||||
|
),
|
||||||
|
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn serialize_body_rejects_compressed_frames_for_now() {
|
||||||
|
assert!(matches!(
|
||||||
|
serialize_body_as_message(
|
||||||
|
&[1],
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
MessageType::Asynchronous,
|
||||||
|
Compression::CompressedLarge,
|
||||||
|
),
|
||||||
|
Err(CoreError::UnsupportedCompression(
|
||||||
|
Compression::CompressedLarge
|
||||||
|
))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_frame_reads_complete_payload() {
|
||||||
|
let mut cursor = Cursor::new(vec![1, 2, 0, 0, 10, 0, 0, 0, 42, 43]);
|
||||||
|
let frame = read_frame(&mut cursor).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(frame, vec![1, 2, 0, 0, 10, 0, 0, 0, 42, 43]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// StreamingDecompressor tests
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Helper: compress a body using the batch decompressor, then verify the
|
||||||
|
/// streaming decompressor produces identical output.
|
||||||
|
///
|
||||||
|
/// Since we don't have an encoder for compression, we test by creating
|
||||||
|
/// compressed data that the batch decompressor can handle and verifying
|
||||||
|
/// the streaming variant matches. We use decompress_ipc_body as the
|
||||||
|
/// reference implementation.
|
||||||
|
fn assert_streaming_matches_batch(compressed_body: &[u8]) {
|
||||||
|
let batch_result = decompress_ipc_body(compressed_body, Encoding::LittleEndian).unwrap();
|
||||||
|
|
||||||
|
// Feed all at once
|
||||||
|
let size_prefix: [u8; 4] = compressed_body[..4].try_into().unwrap();
|
||||||
|
let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||||
|
dec.feed(&compressed_body[4..]).unwrap();
|
||||||
|
assert!(dec.is_complete());
|
||||||
|
let streaming_result = dec.finish().unwrap();
|
||||||
|
assert_eq!(streaming_result, batch_result, "all-at-once mismatch");
|
||||||
|
|
||||||
|
// Feed byte-by-byte
|
||||||
|
let mut dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||||
|
for &byte in &compressed_body[4..] {
|
||||||
|
dec.feed(&[byte]).unwrap();
|
||||||
|
}
|
||||||
|
assert!(dec.is_complete());
|
||||||
|
let streaming_result = dec.finish().unwrap();
|
||||||
|
assert_eq!(streaming_result, batch_result, "byte-by-byte mismatch");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn streaming_decompressor_empty_body() {
|
||||||
|
// Size prefix says 8 bytes total (header only), so decompressed size = 0
|
||||||
|
let size_prefix = 8_i32.to_le_bytes();
|
||||||
|
let dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||||
|
// No data to feed — already complete
|
||||||
|
assert!(dec.is_complete());
|
||||||
|
assert_eq!(dec.decompressed_len(), 0);
|
||||||
|
let result = dec.finish().unwrap();
|
||||||
|
assert!(result.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn streaming_decompressor_rejects_small_size() {
|
||||||
|
let size_prefix = 4_i32.to_le_bytes();
|
||||||
|
assert!(StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn streaming_decompressor_finish_before_complete() {
|
||||||
|
// Size says 16 bytes decompressed (24 total - 8 header)
|
||||||
|
let size_prefix = 24_i32.to_le_bytes();
|
||||||
|
let dec = StreamingDecompressor::new(size_prefix, Encoding::LittleEndian).unwrap();
|
||||||
|
assert!(!dec.is_complete());
|
||||||
|
assert!(dec.finish().is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn streaming_decompressor_literal_only() {
|
||||||
|
// Build a compressed payload that's all literals (no back-references).
|
||||||
|
// Flag byte 0x00 means all 8 bits are "literal".
|
||||||
|
// For a 3-byte decompressed output:
|
||||||
|
// size_prefix = (8 + 3) = 11
|
||||||
|
// compressed: [flag=0x00] [lit1] [lit2] [lit3]
|
||||||
|
let size_prefix = 11_i32.to_le_bytes();
|
||||||
|
let mut compressed = Vec::new();
|
||||||
|
compressed.extend_from_slice(&size_prefix);
|
||||||
|
compressed.push(0x00); // flag: 8 literals
|
||||||
|
compressed.push(0x41); // 'A'
|
||||||
|
compressed.push(0x42); // 'B'
|
||||||
|
compressed.push(0x43); // 'C'
|
||||||
|
|
||||||
|
assert_streaming_matches_batch(&compressed);
|
||||||
|
}
|
||||||
|
}
|
||||||
61
crates/qroissant-core/src/lib.rs
Normal file
61
crates/qroissant-core/src/lib.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
//! q IPC protocol and value semantics for qroissant.
|
||||||
|
//!
|
||||||
|
//! This crate provides the core building blocks for encoding, decoding, and
|
||||||
|
//! representing q/kdb+ IPC messages:
|
||||||
|
//!
|
||||||
|
//! - **`protocol`** — type codes, primitives, shapes, and attributes that
|
||||||
|
//! define the q wire format.
|
||||||
|
//! - **`value`** — the `Value` enum and its variants (`Atom`, `Vector`,
|
||||||
|
//! `List`, `Dictionary`, `Table`) that model q data in Rust.
|
||||||
|
//! - **`frame`** — message framing, header parsing, compression, and the
|
||||||
|
//! `StreamingDecompressor` for incremental LZW decompression.
|
||||||
|
//! - **`decode`** — synchronous message and value decoding with optional
|
||||||
|
//! parallel column decode via rayon.
|
||||||
|
//! - **`encode`** — serialisation of `Value` trees into q IPC byte frames.
|
||||||
|
//! - **`pipelined`** — asynchronous (`tokio::io::AsyncRead`) value decoder
|
||||||
|
//! for streaming use cases.
|
||||||
|
//! - **`extent`** — zero-allocation byte extent scanning used to locate
|
||||||
|
//! column boundaries for parallel decode.
|
||||||
|
|
||||||
|
pub mod decode;
|
||||||
|
pub mod encode;
|
||||||
|
pub mod error;
|
||||||
|
pub mod extent;
|
||||||
|
pub mod frame;
|
||||||
|
pub mod pipelined;
|
||||||
|
pub mod protocol;
|
||||||
|
pub mod value;
|
||||||
|
|
||||||
|
pub use decode::DecodeOptions;
|
||||||
|
pub use decode::DecodedMessage;
|
||||||
|
pub use decode::decode_message;
|
||||||
|
pub use decode::decode_message_with_options;
|
||||||
|
pub use decode::decode_value;
|
||||||
|
pub use decode::decode_value_with_options;
|
||||||
|
pub use encode::encode_message;
|
||||||
|
pub use encode::encode_value;
|
||||||
|
pub use error::CoreError;
|
||||||
|
pub use error::CoreResult;
|
||||||
|
pub use extent::value_byte_extent;
|
||||||
|
pub use frame::Compression;
|
||||||
|
pub use frame::Encoding;
|
||||||
|
pub use frame::Frame;
|
||||||
|
pub use frame::HEADER_LEN;
|
||||||
|
pub use frame::MessageHeader;
|
||||||
|
pub use frame::MessageType;
|
||||||
|
pub use frame::StreamingDecompressor;
|
||||||
|
pub use frame::read_frame;
|
||||||
|
pub use frame::read_message_length;
|
||||||
|
pub use frame::serialize_body_as_message;
|
||||||
|
pub use protocol::Attribute;
|
||||||
|
pub use protocol::Primitive;
|
||||||
|
pub use protocol::Shape;
|
||||||
|
pub use protocol::TypeCode;
|
||||||
|
pub use protocol::ValueType;
|
||||||
|
pub use value::Atom;
|
||||||
|
pub use value::Dictionary;
|
||||||
|
pub use value::List;
|
||||||
|
pub use value::Table;
|
||||||
|
pub use value::Value;
|
||||||
|
pub use value::Vector;
|
||||||
|
pub use value::VectorData;
|
||||||
390
crates/qroissant-core/src/pipelined.rs
Normal file
390
crates/qroissant-core/src/pipelined.rs
Normal file
|
|
@ -0,0 +1,390 @@
|
||||||
|
use futures::future::BoxFuture;
|
||||||
|
use futures::future::FutureExt;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
|
use tokio::io::AsyncReadExt;
|
||||||
|
|
||||||
|
use crate::decode::extract_columns;
|
||||||
|
use crate::decode::extract_symbol_names;
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
use crate::frame::Encoding;
|
||||||
|
use crate::protocol::Attribute;
|
||||||
|
use crate::protocol::Primitive;
|
||||||
|
use crate::protocol::TypeCode;
|
||||||
|
use crate::value::Atom;
|
||||||
|
use crate::value::Dictionary;
|
||||||
|
use crate::value::List;
|
||||||
|
use crate::value::Table;
|
||||||
|
use crate::value::Value;
|
||||||
|
use crate::value::Vector;
|
||||||
|
use crate::value::VectorData;
|
||||||
|
|
||||||
|
/// Asynchronous reader for q value components.
|
||||||
|
///
|
||||||
|
/// Wraps an `AsyncRead` source and provides async methods to read
|
||||||
|
/// primitive types and byte chunks, allowing the decoder to wait
|
||||||
|
/// for data without blocking.
|
||||||
|
///
|
||||||
|
/// Only little-endian payloads are supported (matching the rest of qroissant).
|
||||||
|
pub struct PipelinedReader<R> {
|
||||||
|
reader: R,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: AsyncRead + Unpin> PipelinedReader<R> {
|
||||||
|
/// Creates a new pipelined reader.
|
||||||
|
///
|
||||||
|
/// Returns `UnsupportedEndianness` for big-endian payloads, matching
|
||||||
|
/// the behaviour of `decode_value()` and `decode_message()`.
|
||||||
|
pub fn new(reader: R, encoding: Encoding) -> CoreResult<Self> {
|
||||||
|
if encoding != Encoding::LittleEndian {
|
||||||
|
return Err(CoreError::UnsupportedEndianness(encoding));
|
||||||
|
}
|
||||||
|
Ok(Self { reader })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_u8(&mut self) -> CoreResult<u8> {
|
||||||
|
let mut buf = [0_u8; 1];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(buf[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_i8(&mut self) -> CoreResult<i8> {
|
||||||
|
Ok(self.read_u8().await? as i8)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_i16(&mut self) -> CoreResult<i16> {
|
||||||
|
let mut buf = [0_u8; 2];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(i16::from_le_bytes(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_i32(&mut self) -> CoreResult<i32> {
|
||||||
|
let mut buf = [0_u8; 4];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(i32::from_le_bytes(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_i64(&mut self) -> CoreResult<i64> {
|
||||||
|
let mut buf = [0_u8; 8];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(i64::from_le_bytes(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_f32(&mut self) -> CoreResult<f32> {
|
||||||
|
let mut buf = [0_u8; 4];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(f32::from_le_bytes(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_f64(&mut self) -> CoreResult<f64> {
|
||||||
|
let mut buf = [0_u8; 8];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(f64::from_le_bytes(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_guid(&mut self) -> CoreResult<[u8; 16]> {
|
||||||
|
let mut buf = [0_u8; 16];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_length(&mut self) -> CoreResult<usize> {
|
||||||
|
let length = self.read_i32().await?;
|
||||||
|
usize::try_from(length).map_err(|_| CoreError::InvalidCollectionLength(length))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_bytes(&mut self, len: usize) -> CoreResult<bytes::Bytes> {
|
||||||
|
let mut buf = vec![0_u8; len];
|
||||||
|
self.reader.read_exact(&mut buf).await?;
|
||||||
|
Ok(bytes::Bytes::from(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads a null-terminated symbol.
|
||||||
|
///
|
||||||
|
/// Reads one byte at a time until a null terminator is found.
|
||||||
|
/// In practice the underlying reader is buffered (e.g. `BufReader`
|
||||||
|
/// or `DecompressingReader` with an 8 KB buffer), so single-byte
|
||||||
|
/// `read_exact` calls are cheap — they copy from the user-space buffer
|
||||||
|
/// without issuing a syscall.
|
||||||
|
pub async fn read_symbol(&mut self) -> CoreResult<bytes::Bytes> {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
loop {
|
||||||
|
let b = self.read_u8().await?;
|
||||||
|
if b == 0 {
|
||||||
|
return Ok(bytes::Bytes::from(buf));
|
||||||
|
}
|
||||||
|
buf.push(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_vec<T: bytemuck::Pod + bytemuck::AnyBitPattern>(
|
||||||
|
&mut self,
|
||||||
|
count: usize,
|
||||||
|
) -> CoreResult<Vec<T>> {
|
||||||
|
let _byte_len = count
|
||||||
|
.checked_mul(std::mem::size_of::<T>())
|
||||||
|
.ok_or(CoreError::LengthOverflow(count))?;
|
||||||
|
let mut values = vec![T::zeroed(); count];
|
||||||
|
let dst: &mut [u8] = bytemuck::cast_slice_mut(&mut values);
|
||||||
|
self.reader.read_exact(dst).await?;
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn decode_value_async<R: AsyncRead + Unpin + Send>(
|
||||||
|
reader: &mut PipelinedReader<R>,
|
||||||
|
) -> CoreResult<Value> {
|
||||||
|
decode_inner_async(reader).await
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_inner_async<'a, R: AsyncRead + Unpin + Send>(
|
||||||
|
reader: &'a mut PipelinedReader<R>,
|
||||||
|
) -> BoxFuture<'a, CoreResult<Value>> {
|
||||||
|
async move {
|
||||||
|
let type_code_byte = reader.read_i8().await?;
|
||||||
|
let type_code = TypeCode::try_from(type_code_byte)?;
|
||||||
|
match type_code.shape() {
|
||||||
|
crate::protocol::Shape::Atom => {
|
||||||
|
let primitive = type_code
|
||||||
|
.primitive()
|
||||||
|
.ok_or(CoreError::InvalidTypeCode(type_code.into()))?;
|
||||||
|
Ok(Value::Atom(decode_atom_async(reader, primitive).await?))
|
||||||
|
}
|
||||||
|
crate::protocol::Shape::Vector => {
|
||||||
|
let primitive = type_code
|
||||||
|
.primitive()
|
||||||
|
.ok_or(CoreError::InvalidTypeCode(type_code.into()))?;
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||||
|
let length = reader.read_length().await?;
|
||||||
|
Ok(Value::Vector(
|
||||||
|
decode_vector_async(reader, primitive, attribute, length).await?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
crate::protocol::Shape::List => {
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||||
|
let length = reader.read_length().await?;
|
||||||
|
let mut values = Vec::with_capacity(length);
|
||||||
|
for _ in 0..length {
|
||||||
|
values.push(decode_inner_async(reader).await?);
|
||||||
|
}
|
||||||
|
Ok(Value::List(List::new(attribute, values)))
|
||||||
|
}
|
||||||
|
crate::protocol::Shape::Dictionary => {
|
||||||
|
let sorted = type_code == TypeCode::SortedDictionary;
|
||||||
|
let keys = decode_inner_async(reader).await?;
|
||||||
|
let values = decode_inner_async(reader).await?;
|
||||||
|
let dict = Dictionary::new(sorted, keys, values);
|
||||||
|
dict.validate()?;
|
||||||
|
Ok(Value::Dictionary(dict))
|
||||||
|
}
|
||||||
|
crate::protocol::Shape::Table => {
|
||||||
|
let attribute = Attribute::try_from(reader.read_i8().await?)?;
|
||||||
|
let dict_value = decode_inner_async(reader).await?;
|
||||||
|
match dict_value {
|
||||||
|
Value::Dictionary(dict) => {
|
||||||
|
let names = extract_symbol_names(dict.keys())?;
|
||||||
|
let columns = extract_columns(dict.values())?;
|
||||||
|
let table = Table::new(attribute, names, columns);
|
||||||
|
table.validate()?;
|
||||||
|
Ok(Value::Table(table))
|
||||||
|
}
|
||||||
|
_ => Err(CoreError::InvalidStructure(
|
||||||
|
"q table payload must contain a dictionary body".to_string(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crate::protocol::Shape::UnaryPrimitive => Ok(Value::UnaryPrimitive {
|
||||||
|
opcode: reader.read_i8().await?,
|
||||||
|
}),
|
||||||
|
crate::protocol::Shape::Error => {
|
||||||
|
let error_msg = reader.read_symbol().await?;
|
||||||
|
Err(CoreError::QRuntime(
|
||||||
|
String::from_utf8_lossy(&error_msg).into(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.boxed()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn decode_atom_async<R: AsyncRead + Unpin + Send>(
|
||||||
|
reader: &mut PipelinedReader<R>,
|
||||||
|
primitive: Primitive,
|
||||||
|
) -> CoreResult<Atom> {
|
||||||
|
Ok(match primitive {
|
||||||
|
Primitive::Boolean => Atom::Boolean(reader.read_u8().await? != 0),
|
||||||
|
Primitive::Guid => Atom::Guid(reader.read_guid().await?),
|
||||||
|
Primitive::Byte => Atom::Byte(reader.read_u8().await?),
|
||||||
|
Primitive::Short => Atom::Short(reader.read_i16().await?),
|
||||||
|
Primitive::Int => Atom::Int(reader.read_i32().await?),
|
||||||
|
Primitive::Long => Atom::Long(reader.read_i64().await?),
|
||||||
|
Primitive::Real => Atom::Real(reader.read_f32().await?),
|
||||||
|
Primitive::Float => Atom::Float(reader.read_f64().await?),
|
||||||
|
Primitive::Char => Atom::Char(reader.read_u8().await?),
|
||||||
|
Primitive::Symbol => Atom::Symbol(reader.read_symbol().await?),
|
||||||
|
Primitive::Timestamp => Atom::Timestamp(reader.read_i64().await?),
|
||||||
|
Primitive::Month => Atom::Month(reader.read_i32().await?),
|
||||||
|
Primitive::Date => Atom::Date(reader.read_i32().await?),
|
||||||
|
Primitive::Datetime => Atom::Datetime(reader.read_f64().await?),
|
||||||
|
Primitive::Timespan => Atom::Timespan(reader.read_i64().await?),
|
||||||
|
Primitive::Minute => Atom::Minute(reader.read_i32().await?),
|
||||||
|
Primitive::Second => Atom::Second(reader.read_i32().await?),
|
||||||
|
Primitive::Time => Atom::Time(reader.read_i32().await?),
|
||||||
|
Primitive::Mixed => unreachable!("mixed values are not encoded as atoms"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn decode_vector_async<R: AsyncRead + Unpin + Send>(
|
||||||
|
reader: &mut PipelinedReader<R>,
|
||||||
|
primitive: Primitive,
|
||||||
|
attribute: Attribute,
|
||||||
|
length: usize,
|
||||||
|
) -> CoreResult<Vector> {
|
||||||
|
let data = match primitive {
|
||||||
|
Primitive::Boolean => VectorData::Boolean(reader.read_bytes(length).await?),
|
||||||
|
Primitive::Guid => {
|
||||||
|
let byte_len = length
|
||||||
|
.checked_mul(16)
|
||||||
|
.ok_or(CoreError::LengthOverflow(length))?;
|
||||||
|
VectorData::Guid(reader.read_bytes(byte_len).await?)
|
||||||
|
}
|
||||||
|
Primitive::Byte => VectorData::Byte(reader.read_bytes(length).await?),
|
||||||
|
Primitive::Short => VectorData::Short(reader.read_bytes(length * 2).await?),
|
||||||
|
Primitive::Int => VectorData::Int(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Long => VectorData::Long(reader.read_bytes(length * 8).await?),
|
||||||
|
Primitive::Real => VectorData::Real(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Float => VectorData::Float(reader.read_bytes(length * 8).await?),
|
||||||
|
Primitive::Char => VectorData::Char(reader.read_bytes(length).await?),
|
||||||
|
Primitive::Symbol => {
|
||||||
|
let mut values = Vec::with_capacity(length);
|
||||||
|
for _ in 0..length {
|
||||||
|
values.push(reader.read_symbol().await?);
|
||||||
|
}
|
||||||
|
VectorData::Symbol(values)
|
||||||
|
}
|
||||||
|
Primitive::Timestamp => VectorData::Timestamp(reader.read_bytes(length * 8).await?),
|
||||||
|
Primitive::Month => VectorData::Month(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Date => VectorData::Date(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Datetime => VectorData::Datetime(reader.read_bytes(length * 8).await?),
|
||||||
|
Primitive::Timespan => VectorData::Timespan(reader.read_bytes(length * 8).await?),
|
||||||
|
Primitive::Minute => VectorData::Minute(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Second => VectorData::Second(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Time => VectorData::Time(reader.read_bytes(length * 4).await?),
|
||||||
|
Primitive::Mixed => unreachable!("mixed values are not encoded as vectors"),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Vector::new(attribute, data))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_decode_atom_async() -> CoreResult<()> {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
data.push(TypeCode::IntAtom as u8);
|
||||||
|
data.extend_from_slice(&42_i32.to_le_bytes());
|
||||||
|
|
||||||
|
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||||
|
let value = decode_value_async(&mut reader).await?;
|
||||||
|
|
||||||
|
assert_eq!(value, Value::Atom(Atom::Int(42)));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_decode_vector_async() -> CoreResult<()> {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
data.push(TypeCode::IntVector as u8);
|
||||||
|
data.push(0_u8); // attribute None
|
||||||
|
data.extend_from_slice(&2_i32.to_le_bytes()); // length 2
|
||||||
|
data.extend_from_slice(&10_i32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&20_i32.to_le_bytes());
|
||||||
|
|
||||||
|
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||||
|
let value = decode_value_async(&mut reader).await?;
|
||||||
|
|
||||||
|
match &value {
|
||||||
|
Value::Vector(vector) => {
|
||||||
|
assert_eq!(vector.data().as_i32_slice(), &[10, 20]);
|
||||||
|
}
|
||||||
|
_ => panic!("Expected Vector, got {:?}", value),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_decode_table_async() -> CoreResult<()> {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
data.push(TypeCode::Table as u8);
|
||||||
|
data.push(0_u8); // attribute None
|
||||||
|
|
||||||
|
// Dictionary prefix
|
||||||
|
data.push(TypeCode::Dictionary as u8);
|
||||||
|
|
||||||
|
// Dictionary (keys)
|
||||||
|
data.push(TypeCode::SymbolVector as u8);
|
||||||
|
data.push(0_u8); // attribute None
|
||||||
|
data.extend_from_slice(&1_i32.to_le_bytes()); // 1 column name
|
||||||
|
data.extend_from_slice(b"col1\0");
|
||||||
|
|
||||||
|
// Dictionary (values)
|
||||||
|
data.push(TypeCode::GeneralList as u8);
|
||||||
|
data.push(0_u8); // attribute None
|
||||||
|
data.extend_from_slice(&1_i32.to_le_bytes()); // 1 column
|
||||||
|
|
||||||
|
// Column 1: Int Vector [100, 200]
|
||||||
|
data.push(TypeCode::IntVector as u8);
|
||||||
|
data.push(0_u8);
|
||||||
|
data.extend_from_slice(&2_i32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&100_i32.to_le_bytes());
|
||||||
|
data.extend_from_slice(&200_i32.to_le_bytes());
|
||||||
|
|
||||||
|
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||||
|
let value = decode_value_async(&mut reader).await?;
|
||||||
|
|
||||||
|
match &value {
|
||||||
|
Value::Table(table) => {
|
||||||
|
assert_eq!(table.num_columns(), 1);
|
||||||
|
assert_eq!(&table.column_names()[0][..], b"col1");
|
||||||
|
match &table.columns()[0] {
|
||||||
|
Value::Vector(v) => {
|
||||||
|
assert_eq!(v.data().as_i32_slice(), &[100, 200]);
|
||||||
|
}
|
||||||
|
_ => panic!("Expected Vector"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => panic!("Expected Table, got {:?}", value),
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_rejects_big_endian() {
|
||||||
|
let result = PipelinedReader::new(Cursor::new(vec![]), Encoding::BigEndian);
|
||||||
|
assert!(matches!(
|
||||||
|
result,
|
||||||
|
Err(CoreError::UnsupportedEndianness(Encoding::BigEndian))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_negative_length_gives_proper_error() -> CoreResult<()> {
|
||||||
|
let mut data = Vec::new();
|
||||||
|
data.push(TypeCode::IntVector as u8);
|
||||||
|
data.push(0_u8); // attribute None
|
||||||
|
data.extend_from_slice(&(-1_i32).to_le_bytes()); // negative length
|
||||||
|
|
||||||
|
let mut reader = PipelinedReader::new(Cursor::new(data), Encoding::LittleEndian).unwrap();
|
||||||
|
let err = decode_value_async(&mut reader).await.unwrap_err();
|
||||||
|
assert!(
|
||||||
|
matches!(err, CoreError::InvalidCollectionLength(-1)),
|
||||||
|
"expected InvalidCollectionLength(-1), got {:?}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
373
crates/qroissant-core/src/protocol.rs
Normal file
373
crates/qroissant-core/src/protocol.rs
Normal file
|
|
@ -0,0 +1,373 @@
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
|
||||||
|
/// q attribute attached to vectors, lists, and tables.
|
||||||
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||||
|
pub enum Attribute {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
Sorted,
|
||||||
|
Unique,
|
||||||
|
Parted,
|
||||||
|
Grouped,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<Attribute> for i8 {
|
||||||
|
fn from(value: Attribute) -> Self {
|
||||||
|
match value {
|
||||||
|
Attribute::None => 0,
|
||||||
|
Attribute::Sorted => 1,
|
||||||
|
Attribute::Unique => 2,
|
||||||
|
Attribute::Parted => 3,
|
||||||
|
Attribute::Grouped => 4,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<i8> for Attribute {
|
||||||
|
type Error = CoreError;
|
||||||
|
|
||||||
|
fn try_from(value: i8) -> CoreResult<Self> {
|
||||||
|
match value {
|
||||||
|
0 => Ok(Self::None),
|
||||||
|
1 => Ok(Self::Sorted),
|
||||||
|
2 => Ok(Self::Unique),
|
||||||
|
3 => Ok(Self::Parted),
|
||||||
|
4 => Ok(Self::Grouped),
|
||||||
|
_ => Err(CoreError::InvalidAttribute(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q primitive domain shared by atoms and homogeneous vectors.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub enum Primitive {
|
||||||
|
Boolean,
|
||||||
|
Guid,
|
||||||
|
Byte,
|
||||||
|
Short,
|
||||||
|
Int,
|
||||||
|
Long,
|
||||||
|
Real,
|
||||||
|
Float,
|
||||||
|
Char,
|
||||||
|
Symbol,
|
||||||
|
Timestamp,
|
||||||
|
Month,
|
||||||
|
Date,
|
||||||
|
Datetime,
|
||||||
|
Timespan,
|
||||||
|
Minute,
|
||||||
|
Second,
|
||||||
|
Time,
|
||||||
|
Mixed,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Primitive {
|
||||||
|
/// Fixed-width byte width for primitives that have one on the wire.
|
||||||
|
pub fn width(self) -> Option<usize> {
|
||||||
|
match self {
|
||||||
|
Self::Boolean | Self::Byte | Self::Char => Some(1),
|
||||||
|
Self::Short => Some(2),
|
||||||
|
Self::Int
|
||||||
|
| Self::Real
|
||||||
|
| Self::Month
|
||||||
|
| Self::Date
|
||||||
|
| Self::Minute
|
||||||
|
| Self::Second
|
||||||
|
| Self::Time => Some(4),
|
||||||
|
Self::Long | Self::Float | Self::Timestamp | Self::Datetime | Self::Timespan => Some(8),
|
||||||
|
Self::Guid => Some(16),
|
||||||
|
Self::Symbol | Self::Mixed => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Top-level q structural shape.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub enum Shape {
|
||||||
|
Atom,
|
||||||
|
Vector,
|
||||||
|
List,
|
||||||
|
Dictionary,
|
||||||
|
Table,
|
||||||
|
UnaryPrimitive,
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Complete q type descriptor for a decoded value.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub struct ValueType {
|
||||||
|
pub primitive: Option<Primitive>,
|
||||||
|
pub shape: Shape,
|
||||||
|
pub attribute: Option<Attribute>,
|
||||||
|
pub sorted: Option<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ValueType {
|
||||||
|
pub fn atom(primitive: Primitive) -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: Some(primitive),
|
||||||
|
shape: Shape::Atom,
|
||||||
|
attribute: None,
|
||||||
|
sorted: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn vector(primitive: Primitive, attribute: Attribute) -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: Some(primitive),
|
||||||
|
shape: Shape::Vector,
|
||||||
|
attribute: Some(attribute),
|
||||||
|
sorted: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn list(attribute: Attribute) -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: Some(Primitive::Mixed),
|
||||||
|
shape: Shape::List,
|
||||||
|
attribute: Some(attribute),
|
||||||
|
sorted: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dictionary(sorted: bool) -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: None,
|
||||||
|
shape: Shape::Dictionary,
|
||||||
|
attribute: None,
|
||||||
|
sorted: Some(sorted),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn table(attribute: Attribute) -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: None,
|
||||||
|
shape: Shape::Table,
|
||||||
|
attribute: Some(attribute),
|
||||||
|
sorted: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unary_primitive() -> Self {
|
||||||
|
Self {
|
||||||
|
primitive: None,
|
||||||
|
shape: Shape::UnaryPrimitive,
|
||||||
|
attribute: None,
|
||||||
|
sorted: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Raw q IPC type code.
|
||||||
|
#[repr(i8)]
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub enum TypeCode {
|
||||||
|
GeneralList = 0,
|
||||||
|
BooleanVector = 1,
|
||||||
|
GuidVector = 2,
|
||||||
|
ByteVector = 4,
|
||||||
|
ShortVector = 5,
|
||||||
|
IntVector = 6,
|
||||||
|
LongVector = 7,
|
||||||
|
RealVector = 8,
|
||||||
|
FloatVector = 9,
|
||||||
|
CharVector = 10,
|
||||||
|
SymbolVector = 11,
|
||||||
|
TimestampVector = 12,
|
||||||
|
MonthVector = 13,
|
||||||
|
DateVector = 14,
|
||||||
|
DatetimeVector = 15,
|
||||||
|
TimespanVector = 16,
|
||||||
|
MinuteVector = 17,
|
||||||
|
SecondVector = 18,
|
||||||
|
TimeVector = 19,
|
||||||
|
Table = 98,
|
||||||
|
Dictionary = 99,
|
||||||
|
UnaryPrimitive = 101,
|
||||||
|
SortedDictionary = 127,
|
||||||
|
BooleanAtom = -1,
|
||||||
|
GuidAtom = -2,
|
||||||
|
ByteAtom = -4,
|
||||||
|
ShortAtom = -5,
|
||||||
|
IntAtom = -6,
|
||||||
|
LongAtom = -7,
|
||||||
|
RealAtom = -8,
|
||||||
|
FloatAtom = -9,
|
||||||
|
CharAtom = -10,
|
||||||
|
SymbolAtom = -11,
|
||||||
|
TimestampAtom = -12,
|
||||||
|
MonthAtom = -13,
|
||||||
|
DateAtom = -14,
|
||||||
|
DatetimeAtom = -15,
|
||||||
|
TimespanAtom = -16,
|
||||||
|
MinuteAtom = -17,
|
||||||
|
SecondAtom = -18,
|
||||||
|
TimeAtom = -19,
|
||||||
|
ErrorCode = -128,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TypeCode {
|
||||||
|
pub fn primitive(self) -> Option<Primitive> {
|
||||||
|
match self {
|
||||||
|
Self::BooleanAtom | Self::BooleanVector => Some(Primitive::Boolean),
|
||||||
|
Self::GuidAtom | Self::GuidVector => Some(Primitive::Guid),
|
||||||
|
Self::ByteAtom | Self::ByteVector => Some(Primitive::Byte),
|
||||||
|
Self::ShortAtom | Self::ShortVector => Some(Primitive::Short),
|
||||||
|
Self::IntAtom | Self::IntVector => Some(Primitive::Int),
|
||||||
|
Self::LongAtom | Self::LongVector => Some(Primitive::Long),
|
||||||
|
Self::RealAtom | Self::RealVector => Some(Primitive::Real),
|
||||||
|
Self::FloatAtom | Self::FloatVector => Some(Primitive::Float),
|
||||||
|
Self::CharAtom | Self::CharVector => Some(Primitive::Char),
|
||||||
|
Self::SymbolAtom | Self::SymbolVector => Some(Primitive::Symbol),
|
||||||
|
Self::TimestampAtom | Self::TimestampVector => Some(Primitive::Timestamp),
|
||||||
|
Self::MonthAtom | Self::MonthVector => Some(Primitive::Month),
|
||||||
|
Self::DateAtom | Self::DateVector => Some(Primitive::Date),
|
||||||
|
Self::DatetimeAtom | Self::DatetimeVector => Some(Primitive::Datetime),
|
||||||
|
Self::TimespanAtom | Self::TimespanVector => Some(Primitive::Timespan),
|
||||||
|
Self::MinuteAtom | Self::MinuteVector => Some(Primitive::Minute),
|
||||||
|
Self::SecondAtom | Self::SecondVector => Some(Primitive::Second),
|
||||||
|
Self::TimeAtom | Self::TimeVector => Some(Primitive::Time),
|
||||||
|
Self::GeneralList
|
||||||
|
| Self::Table
|
||||||
|
| Self::Dictionary
|
||||||
|
| Self::UnaryPrimitive
|
||||||
|
| Self::SortedDictionary
|
||||||
|
| Self::ErrorCode => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn shape(self) -> Shape {
|
||||||
|
match self {
|
||||||
|
Self::BooleanAtom
|
||||||
|
| Self::GuidAtom
|
||||||
|
| Self::ByteAtom
|
||||||
|
| Self::ShortAtom
|
||||||
|
| Self::IntAtom
|
||||||
|
| Self::LongAtom
|
||||||
|
| Self::RealAtom
|
||||||
|
| Self::FloatAtom
|
||||||
|
| Self::CharAtom
|
||||||
|
| Self::SymbolAtom
|
||||||
|
| Self::TimestampAtom
|
||||||
|
| Self::MonthAtom
|
||||||
|
| Self::DateAtom
|
||||||
|
| Self::DatetimeAtom
|
||||||
|
| Self::TimespanAtom
|
||||||
|
| Self::MinuteAtom
|
||||||
|
| Self::SecondAtom
|
||||||
|
| Self::TimeAtom => Shape::Atom,
|
||||||
|
Self::BooleanVector
|
||||||
|
| Self::GuidVector
|
||||||
|
| Self::ByteVector
|
||||||
|
| Self::ShortVector
|
||||||
|
| Self::IntVector
|
||||||
|
| Self::LongVector
|
||||||
|
| Self::RealVector
|
||||||
|
| Self::FloatVector
|
||||||
|
| Self::CharVector
|
||||||
|
| Self::SymbolVector
|
||||||
|
| Self::TimestampVector
|
||||||
|
| Self::MonthVector
|
||||||
|
| Self::DateVector
|
||||||
|
| Self::DatetimeVector
|
||||||
|
| Self::TimespanVector
|
||||||
|
| Self::MinuteVector
|
||||||
|
| Self::SecondVector
|
||||||
|
| Self::TimeVector => Shape::Vector,
|
||||||
|
Self::GeneralList => Shape::List,
|
||||||
|
Self::Dictionary | Self::SortedDictionary => Shape::Dictionary,
|
||||||
|
Self::Table => Shape::Table,
|
||||||
|
Self::UnaryPrimitive => Shape::UnaryPrimitive,
|
||||||
|
Self::ErrorCode => Shape::Error,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<TypeCode> for i8 {
|
||||||
|
fn from(value: TypeCode) -> Self {
|
||||||
|
value as i8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<i8> for TypeCode {
|
||||||
|
type Error = CoreError;
|
||||||
|
|
||||||
|
fn try_from(value: i8) -> CoreResult<Self> {
|
||||||
|
match value {
|
||||||
|
0 => Ok(Self::GeneralList),
|
||||||
|
1 => Ok(Self::BooleanVector),
|
||||||
|
2 => Ok(Self::GuidVector),
|
||||||
|
4 => Ok(Self::ByteVector),
|
||||||
|
5 => Ok(Self::ShortVector),
|
||||||
|
6 => Ok(Self::IntVector),
|
||||||
|
7 => Ok(Self::LongVector),
|
||||||
|
8 => Ok(Self::RealVector),
|
||||||
|
9 => Ok(Self::FloatVector),
|
||||||
|
10 => Ok(Self::CharVector),
|
||||||
|
11 => Ok(Self::SymbolVector),
|
||||||
|
12 => Ok(Self::TimestampVector),
|
||||||
|
13 => Ok(Self::MonthVector),
|
||||||
|
14 => Ok(Self::DateVector),
|
||||||
|
15 => Ok(Self::DatetimeVector),
|
||||||
|
16 => Ok(Self::TimespanVector),
|
||||||
|
17 => Ok(Self::MinuteVector),
|
||||||
|
18 => Ok(Self::SecondVector),
|
||||||
|
19 => Ok(Self::TimeVector),
|
||||||
|
98 => Ok(Self::Table),
|
||||||
|
99 => Ok(Self::Dictionary),
|
||||||
|
101 => Ok(Self::UnaryPrimitive),
|
||||||
|
127 => Ok(Self::SortedDictionary),
|
||||||
|
-1 => Ok(Self::BooleanAtom),
|
||||||
|
-2 => Ok(Self::GuidAtom),
|
||||||
|
-4 => Ok(Self::ByteAtom),
|
||||||
|
-5 => Ok(Self::ShortAtom),
|
||||||
|
-6 => Ok(Self::IntAtom),
|
||||||
|
-7 => Ok(Self::LongAtom),
|
||||||
|
-8 => Ok(Self::RealAtom),
|
||||||
|
-9 => Ok(Self::FloatAtom),
|
||||||
|
-10 => Ok(Self::CharAtom),
|
||||||
|
-11 => Ok(Self::SymbolAtom),
|
||||||
|
-12 => Ok(Self::TimestampAtom),
|
||||||
|
-13 => Ok(Self::MonthAtom),
|
||||||
|
-14 => Ok(Self::DateAtom),
|
||||||
|
-15 => Ok(Self::DatetimeAtom),
|
||||||
|
-16 => Ok(Self::TimespanAtom),
|
||||||
|
-17 => Ok(Self::MinuteAtom),
|
||||||
|
-18 => Ok(Self::SecondAtom),
|
||||||
|
-19 => Ok(Self::TimeAtom),
|
||||||
|
-128 => Ok(Self::ErrorCode),
|
||||||
|
_ => Err(CoreError::InvalidTypeCode(value)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attribute_round_trips() {
|
||||||
|
assert_eq!(Attribute::try_from(0).unwrap(), Attribute::None);
|
||||||
|
assert_eq!(Attribute::try_from(4).unwrap(), Attribute::Grouped);
|
||||||
|
assert!(matches!(
|
||||||
|
Attribute::try_from(9),
|
||||||
|
Err(CoreError::InvalidAttribute(9))
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn type_code_maps_to_expected_shape_and_primitive() {
|
||||||
|
let atom = TypeCode::IntAtom;
|
||||||
|
let vector = TypeCode::SymbolVector;
|
||||||
|
let list = TypeCode::GeneralList;
|
||||||
|
|
||||||
|
assert_eq!(atom.shape(), Shape::Atom);
|
||||||
|
assert_eq!(atom.primitive(), Some(Primitive::Int));
|
||||||
|
assert_eq!(vector.shape(), Shape::Vector);
|
||||||
|
assert_eq!(vector.primitive(), Some(Primitive::Symbol));
|
||||||
|
assert_eq!(list.shape(), Shape::List);
|
||||||
|
assert_eq!(list.primitive(), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
479
crates/qroissant-core/src/value.rs
Normal file
479
crates/qroissant-core/src/value.rs
Normal file
|
|
@ -0,0 +1,479 @@
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
use crate::error::CoreError;
|
||||||
|
use crate::error::CoreResult;
|
||||||
|
use crate::protocol::Attribute;
|
||||||
|
use crate::protocol::Primitive;
|
||||||
|
use crate::protocol::ValueType;
|
||||||
|
|
||||||
|
/// q atom payload.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub enum Atom {
|
||||||
|
Boolean(bool),
|
||||||
|
Guid([u8; 16]),
|
||||||
|
Byte(u8),
|
||||||
|
Short(i16),
|
||||||
|
Int(i32),
|
||||||
|
Long(i64),
|
||||||
|
Real(f32),
|
||||||
|
Float(f64),
|
||||||
|
Char(u8),
|
||||||
|
Symbol(Bytes),
|
||||||
|
Timestamp(i64),
|
||||||
|
Month(i32),
|
||||||
|
Date(i32),
|
||||||
|
Datetime(f64),
|
||||||
|
Timespan(i64),
|
||||||
|
Minute(i32),
|
||||||
|
Second(i32),
|
||||||
|
Time(i32),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Atom {
|
||||||
|
pub fn primitive(&self) -> Primitive {
|
||||||
|
match self {
|
||||||
|
Self::Boolean(_) => Primitive::Boolean,
|
||||||
|
Self::Guid(_) => Primitive::Guid,
|
||||||
|
Self::Byte(_) => Primitive::Byte,
|
||||||
|
Self::Short(_) => Primitive::Short,
|
||||||
|
Self::Int(_) => Primitive::Int,
|
||||||
|
Self::Long(_) => Primitive::Long,
|
||||||
|
Self::Real(_) => Primitive::Real,
|
||||||
|
Self::Float(_) => Primitive::Float,
|
||||||
|
Self::Char(_) => Primitive::Char,
|
||||||
|
Self::Symbol(_) => Primitive::Symbol,
|
||||||
|
Self::Timestamp(_) => Primitive::Timestamp,
|
||||||
|
Self::Month(_) => Primitive::Month,
|
||||||
|
Self::Date(_) => Primitive::Date,
|
||||||
|
Self::Datetime(_) => Primitive::Datetime,
|
||||||
|
Self::Timespan(_) => Primitive::Timespan,
|
||||||
|
Self::Minute(_) => Primitive::Minute,
|
||||||
|
Self::Second(_) => Primitive::Second,
|
||||||
|
Self::Time(_) => Primitive::Time,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q homogeneous vector payload.
|
||||||
|
///
|
||||||
|
/// All fixed-width numeric types store their data as raw [`Bytes`], enabling
|
||||||
|
/// zero-copy slicing from the IPC frame buffer during decode. Typed access
|
||||||
|
/// is provided via `as_*_slice()` methods using `bytemuck::cast_slice`.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub enum VectorData {
|
||||||
|
Boolean(Bytes),
|
||||||
|
Guid(Bytes),
|
||||||
|
Byte(Bytes),
|
||||||
|
Short(Bytes),
|
||||||
|
Int(Bytes),
|
||||||
|
Long(Bytes),
|
||||||
|
Real(Bytes),
|
||||||
|
Float(Bytes),
|
||||||
|
Char(Bytes),
|
||||||
|
Symbol(Vec<Bytes>),
|
||||||
|
Timestamp(Bytes),
|
||||||
|
Month(Bytes),
|
||||||
|
Date(Bytes),
|
||||||
|
Datetime(Bytes),
|
||||||
|
Timespan(Bytes),
|
||||||
|
Minute(Bytes),
|
||||||
|
Second(Bytes),
|
||||||
|
Time(Bytes),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VectorData {
|
||||||
|
pub fn primitive(&self) -> Primitive {
|
||||||
|
match self {
|
||||||
|
Self::Boolean(_) => Primitive::Boolean,
|
||||||
|
Self::Guid(_) => Primitive::Guid,
|
||||||
|
Self::Byte(_) => Primitive::Byte,
|
||||||
|
Self::Short(_) => Primitive::Short,
|
||||||
|
Self::Int(_) => Primitive::Int,
|
||||||
|
Self::Long(_) => Primitive::Long,
|
||||||
|
Self::Real(_) => Primitive::Real,
|
||||||
|
Self::Float(_) => Primitive::Float,
|
||||||
|
Self::Char(_) => Primitive::Char,
|
||||||
|
Self::Symbol(_) => Primitive::Symbol,
|
||||||
|
Self::Timestamp(_) => Primitive::Timestamp,
|
||||||
|
Self::Month(_) => Primitive::Month,
|
||||||
|
Self::Date(_) => Primitive::Date,
|
||||||
|
Self::Datetime(_) => Primitive::Datetime,
|
||||||
|
Self::Timespan(_) => Primitive::Timespan,
|
||||||
|
Self::Minute(_) => Primitive::Minute,
|
||||||
|
Self::Second(_) => Primitive::Second,
|
||||||
|
Self::Time(_) => Primitive::Time,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Self::Boolean(b) | Self::Byte(b) | Self::Char(b) => b.len(),
|
||||||
|
Self::Guid(b) => b.len() / 16,
|
||||||
|
Self::Short(b) => b.len() / 2,
|
||||||
|
Self::Int(b)
|
||||||
|
| Self::Month(b)
|
||||||
|
| Self::Date(b)
|
||||||
|
| Self::Minute(b)
|
||||||
|
| Self::Second(b)
|
||||||
|
| Self::Time(b)
|
||||||
|
| Self::Real(b) => b.len() / 4,
|
||||||
|
Self::Long(b)
|
||||||
|
| Self::Timestamp(b)
|
||||||
|
| Self::Timespan(b)
|
||||||
|
| Self::Float(b)
|
||||||
|
| Self::Datetime(b) => b.len() / 8,
|
||||||
|
Self::Symbol(v) => v.len(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the underlying raw bytes for non-Symbol variants.
|
||||||
|
pub fn raw_bytes(&self) -> Option<&Bytes> {
|
||||||
|
match self {
|
||||||
|
Self::Symbol(_) => None,
|
||||||
|
Self::Boolean(b)
|
||||||
|
| Self::Guid(b)
|
||||||
|
| Self::Byte(b)
|
||||||
|
| Self::Short(b)
|
||||||
|
| Self::Int(b)
|
||||||
|
| Self::Long(b)
|
||||||
|
| Self::Real(b)
|
||||||
|
| Self::Float(b)
|
||||||
|
| Self::Char(b)
|
||||||
|
| Self::Timestamp(b)
|
||||||
|
| Self::Month(b)
|
||||||
|
| Self::Date(b)
|
||||||
|
| Self::Datetime(b)
|
||||||
|
| Self::Timespan(b)
|
||||||
|
| Self::Minute(b)
|
||||||
|
| Self::Second(b)
|
||||||
|
| Self::Time(b) => Some(b),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_i16_slice(&self) -> &[i16] {
|
||||||
|
match self {
|
||||||
|
Self::Short(b) => bytemuck::cast_slice(b),
|
||||||
|
_ => panic!("as_i16_slice called on {:?}", self.primitive()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_i32_slice(&self) -> &[i32] {
|
||||||
|
match self {
|
||||||
|
Self::Int(b)
|
||||||
|
| Self::Month(b)
|
||||||
|
| Self::Date(b)
|
||||||
|
| Self::Minute(b)
|
||||||
|
| Self::Second(b)
|
||||||
|
| Self::Time(b) => bytemuck::cast_slice(b),
|
||||||
|
_ => panic!("as_i32_slice called on {:?}", self.primitive()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_i64_slice(&self) -> &[i64] {
|
||||||
|
match self {
|
||||||
|
Self::Long(b) | Self::Timestamp(b) | Self::Timespan(b) => bytemuck::cast_slice(b),
|
||||||
|
_ => panic!("as_i64_slice called on {:?}", self.primitive()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_f32_slice(&self) -> &[f32] {
|
||||||
|
match self {
|
||||||
|
Self::Real(b) => bytemuck::cast_slice(b),
|
||||||
|
_ => panic!("as_f32_slice called on {:?}", self.primitive()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_f64_slice(&self) -> &[f64] {
|
||||||
|
match self {
|
||||||
|
Self::Float(b) | Self::Datetime(b) => bytemuck::cast_slice(b),
|
||||||
|
_ => panic!("as_f64_slice called on {:?}", self.primitive()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construction helpers for tests and ingestion paths.
|
||||||
|
|
||||||
|
pub fn from_i16s(values: &[i16]) -> Self {
|
||||||
|
Self::Short(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_i32s(values: &[i32]) -> Self {
|
||||||
|
Self::Int(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_i64s(values: &[i64]) -> Self {
|
||||||
|
Self::Long(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_f32s(values: &[f32]) -> Self {
|
||||||
|
Self::Real(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_f64s(values: &[f64]) -> Self {
|
||||||
|
Self::Float(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_guids(values: &[[u8; 16]]) -> Self {
|
||||||
|
let mut buf = Vec::with_capacity(values.len() * 16);
|
||||||
|
for guid in values {
|
||||||
|
buf.extend_from_slice(guid);
|
||||||
|
}
|
||||||
|
Self::Guid(Bytes::from(buf))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_timestamps(values: &[i64]) -> Self {
|
||||||
|
Self::Timestamp(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_months(values: &[i32]) -> Self {
|
||||||
|
Self::Month(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_dates(values: &[i32]) -> Self {
|
||||||
|
Self::Date(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_datetimes(values: &[f64]) -> Self {
|
||||||
|
Self::Datetime(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_timespans(values: &[i64]) -> Self {
|
||||||
|
Self::Timespan(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_minutes(values: &[i32]) -> Self {
|
||||||
|
Self::Minute(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_seconds(values: &[i32]) -> Self {
|
||||||
|
Self::Second(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_times(values: &[i32]) -> Self {
|
||||||
|
Self::Time(Bytes::copy_from_slice(bytemuck::cast_slice(values)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q homogeneous vector with an attached q attribute.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct Vector {
|
||||||
|
attribute: Attribute,
|
||||||
|
data: VectorData,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Vector {
|
||||||
|
pub fn new(attribute: Attribute, data: VectorData) -> Self {
|
||||||
|
Self { attribute, data }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attribute(&self) -> Attribute {
|
||||||
|
self.attribute
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn primitive(&self) -> Primitive {
|
||||||
|
self.data.primitive()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.data.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.data.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn data(&self) -> &VectorData {
|
||||||
|
&self.data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q general list.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct List {
|
||||||
|
attribute: Attribute,
|
||||||
|
values: Vec<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl List {
|
||||||
|
pub fn new(attribute: Attribute, values: Vec<Value>) -> Self {
|
||||||
|
Self { attribute, values }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attribute(&self) -> Attribute {
|
||||||
|
self.attribute
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.values.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.values.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn values(&self) -> &[Value] {
|
||||||
|
&self.values
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q dictionary.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct Dictionary {
|
||||||
|
sorted: bool,
|
||||||
|
keys: Box<Value>,
|
||||||
|
values: Box<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dictionary {
|
||||||
|
pub fn new(sorted: bool, keys: Value, values: Value) -> Self {
|
||||||
|
Self {
|
||||||
|
sorted,
|
||||||
|
keys: Box::new(keys),
|
||||||
|
values: Box::new(values),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn sorted(&self) -> bool {
|
||||||
|
self.sorted
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn keys(&self) -> &Value {
|
||||||
|
&self.keys
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn values(&self) -> &Value {
|
||||||
|
&self.values
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.keys.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate(&self) -> CoreResult<()> {
|
||||||
|
if self.keys.len() != self.values.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"q dictionary key/value lengths differ: {} != {}",
|
||||||
|
self.keys.len(),
|
||||||
|
self.values.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// q table.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub struct Table {
|
||||||
|
attribute: Attribute,
|
||||||
|
column_names: Vec<Bytes>,
|
||||||
|
columns: Vec<Value>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Table {
|
||||||
|
pub fn new(attribute: Attribute, column_names: Vec<Bytes>, columns: Vec<Value>) -> Self {
|
||||||
|
Self {
|
||||||
|
attribute,
|
||||||
|
column_names,
|
||||||
|
columns,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attribute(&self) -> Attribute {
|
||||||
|
self.attribute
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn column_names(&self) -> &[Bytes] {
|
||||||
|
&self.column_names
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn columns(&self) -> &[Value] {
|
||||||
|
&self.columns
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn num_columns(&self) -> usize {
|
||||||
|
self.columns.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.columns.first().map_or(0, Value::len)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate(&self) -> CoreResult<()> {
|
||||||
|
if self.column_names.len() != self.columns.len() {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"q table column name count {} does not match column count {}",
|
||||||
|
self.column_names.len(),
|
||||||
|
self.columns.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(expected_rows) = self.columns.first().map(Value::len) {
|
||||||
|
for column in self.columns.iter().skip(1) {
|
||||||
|
if column.len() != expected_rows {
|
||||||
|
return Err(CoreError::InvalidStructure(format!(
|
||||||
|
"q table column lengths differ: expected {expected_rows}, found {}",
|
||||||
|
column.len()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decoded q value subset currently supported by the rewrite.
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub enum Value {
|
||||||
|
Atom(Atom),
|
||||||
|
Vector(Vector),
|
||||||
|
List(List),
|
||||||
|
Dictionary(Dictionary),
|
||||||
|
Table(Table),
|
||||||
|
UnaryPrimitive { opcode: i8 },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Value {
|
||||||
|
pub fn qtype(&self) -> ValueType {
|
||||||
|
match self {
|
||||||
|
Self::Atom(atom) => ValueType::atom(atom.primitive()),
|
||||||
|
Self::Vector(vector) => ValueType::vector(vector.primitive(), vector.attribute()),
|
||||||
|
Self::List(list) => ValueType::list(list.attribute()),
|
||||||
|
Self::Dictionary(dictionary) => ValueType::dictionary(dictionary.sorted()),
|
||||||
|
Self::Table(table) => ValueType::table(table.attribute()),
|
||||||
|
Self::UnaryPrimitive { .. } => ValueType::unary_primitive(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Self::Atom(_) | Self::UnaryPrimitive { .. } => 1,
|
||||||
|
Self::Vector(vector) => vector.len(),
|
||||||
|
Self::List(list) => list.len(),
|
||||||
|
Self::Dictionary(dictionary) => dictionary.len(),
|
||||||
|
Self::Table(table) => table.len(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Atom(_) | Self::UnaryPrimitive { .. } => false,
|
||||||
|
Self::Vector(vector) => vector.is_empty(),
|
||||||
|
Self::List(list) => list.is_empty(),
|
||||||
|
Self::Dictionary(dictionary) => dictionary.is_empty(),
|
||||||
|
Self::Table(table) => table.is_empty(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
11
crates/qroissant-kernels/Cargo.toml
Normal file
11
crates/qroissant-kernels/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
[package]
|
||||||
|
name = "qroissant-kernels"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "qroissant_kernels"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
121
crates/qroissant-kernels/src/boolean.rs
Normal file
121
crates/qroissant-kernels/src/boolean.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
||||||
|
//! SIMD boolean bit-packing for q → Arrow projection.
|
||||||
|
//!
|
||||||
|
//! q stores boolean vectors as one byte per element (`0` = false, `1` = true,
|
||||||
|
//! `2` = null on the wire — see [`crate::nulls::Q_NULL_BOOLEAN_WIRE`]).
|
||||||
|
//! Arrow `BooleanArray` uses a compact bitmap: one bit per element, LSB-first
|
||||||
|
//! within each byte.
|
||||||
|
//!
|
||||||
|
//! [`pack_bool_bytes`] converts a q boolean byte slice into an Arrow-compatible
|
||||||
|
//! packed bitmap using SIMD comparisons, processing `N` bytes per iteration.
|
||||||
|
|
||||||
|
use std::simd::prelude::*;
|
||||||
|
|
||||||
|
/// Packs a slice of q boolean bytes into an Arrow-compatible LSB-first bitmap.
|
||||||
|
///
|
||||||
|
/// Each source byte is treated as non-zero → `1` bit, zero → `0` bit.
|
||||||
|
/// Null bytes (`2`) are treated as truthy here — callers that need a separate
|
||||||
|
/// null buffer should pass in a pre-filtered slice or handle nulls separately.
|
||||||
|
///
|
||||||
|
/// Returns `(bitmap_bytes, element_count)` where `bitmap_bytes` is the packed
|
||||||
|
/// bitmap (length `ceil(src.len() / 8)`) and `element_count == src.len()`.
|
||||||
|
///
|
||||||
|
/// The returned `Vec<u8>` is suitable for wrapping directly into an Arrow
|
||||||
|
/// `arrow_buffer::Buffer` → `arrow_array::types::BooleanBuffer`.
|
||||||
|
#[inline]
|
||||||
|
pub fn pack_bool_bytes(src: &[u8]) -> (Vec<u8>, usize) {
|
||||||
|
let len = src.len();
|
||||||
|
let out_len = len.div_ceil(8);
|
||||||
|
let mut out = vec![0u8; out_len];
|
||||||
|
|
||||||
|
const N: usize = 8;
|
||||||
|
let zero_v = Simd::<u8, N>::splat(0u8);
|
||||||
|
// Number of full 8-byte chunks we can process with SIMD.
|
||||||
|
let n_aligned = (len / N) * N;
|
||||||
|
|
||||||
|
for (i, chunk) in src[..n_aligned].chunks_exact(N).enumerate() {
|
||||||
|
let v = Simd::<u8, N>::from_slice(chunk);
|
||||||
|
// Compare each byte to zero: non-zero → true (1-bit), zero → false (0-bit).
|
||||||
|
let mask: std::simd::Mask<i8, N> = v.simd_ne(zero_v);
|
||||||
|
// `to_bitmask()` produces a u8 with one bit per lane, LSB = lane 0.
|
||||||
|
out[i] = mask.to_bitmask() as u8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scalar tail (fewer than N elements remain).
|
||||||
|
if n_aligned < len {
|
||||||
|
let mut tail_byte = 0u8;
|
||||||
|
for (bit, &b) in src[n_aligned..].iter().enumerate() {
|
||||||
|
if b != 0 {
|
||||||
|
tail_byte |= 1u8 << bit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out[n_aligned / N] = tail_byte;
|
||||||
|
}
|
||||||
|
|
||||||
|
(out, len)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_empty() {
|
||||||
|
let (bm, n) = pack_bool_bytes(&[]);
|
||||||
|
assert_eq!(n, 0);
|
||||||
|
assert!(bm.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_all_false() {
|
||||||
|
let src = [0u8; 16];
|
||||||
|
let (bm, n) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(n, 16);
|
||||||
|
assert_eq!(bm, [0u8, 0u8]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_all_true() {
|
||||||
|
let src = [1u8; 16];
|
||||||
|
let (bm, n) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(n, 16);
|
||||||
|
assert_eq!(bm, [0xFF, 0xFF]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_lsb_first() {
|
||||||
|
// Only the first element is true → bit 0 of byte 0 should be set.
|
||||||
|
let mut src = [0u8; 8];
|
||||||
|
src[0] = 1;
|
||||||
|
let (bm, _) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(bm[0], 0b00000001);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_last_element_in_first_chunk() {
|
||||||
|
// Only the 8th element (index 7) is true → bit 7 of byte 0.
|
||||||
|
let mut src = [0u8; 8];
|
||||||
|
src[7] = 1;
|
||||||
|
let (bm, _) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(bm[0], 0b10000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_tail_single() {
|
||||||
|
// 9 elements: first 8 all false, 9th is true → bit 0 of byte 1.
|
||||||
|
let mut src = [0u8; 9];
|
||||||
|
src[8] = 1;
|
||||||
|
let (bm, n) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(n, 9);
|
||||||
|
assert_eq!(bm.len(), 2);
|
||||||
|
assert_eq!(bm[0], 0x00);
|
||||||
|
assert_eq!(bm[1], 0b00000001);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pack_non_zero_is_true() {
|
||||||
|
// Any non-zero value should count as true.
|
||||||
|
let src = [0u8, 2u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8];
|
||||||
|
let (bm, _) = pack_bool_bytes(&src);
|
||||||
|
assert_eq!(bm[0], 0b00000010);
|
||||||
|
}
|
||||||
|
}
|
||||||
25
crates/qroissant-kernels/src/lib.rs
Normal file
25
crates/qroissant-kernels/src/lib.rs
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
#![feature(portable_simd)]
|
||||||
|
//! SIMD and hot kernels for qroissant.
|
||||||
|
//!
|
||||||
|
//! This crate provides two categories of primitives:
|
||||||
|
//!
|
||||||
|
//! 1. **Constants** – null sentinels and epoch-offset values used throughout
|
||||||
|
//! the workspace to interpret q IPC wire bytes.
|
||||||
|
//!
|
||||||
|
//! 2. **Scalar transforms** – functions that operate on typed Rust slices.
|
||||||
|
//! These are correct scalar implementations; future iterations will add
|
||||||
|
//! `portable_simd` specialisations in this same crate without changing the
|
||||||
|
//! public API consumed by `qroissant-arrow`.
|
||||||
|
//!
|
||||||
|
//! # Architecture rule
|
||||||
|
//! All nightly-sensitive code (`portable_simd`, intrinsics, etc.) must live
|
||||||
|
//! in this crate so that the rest of the workspace can remain on stable if
|
||||||
|
//! needed and so that performance-sensitive code has a single home.
|
||||||
|
|
||||||
|
pub mod boolean;
|
||||||
|
pub mod nulls;
|
||||||
|
pub mod temporal;
|
||||||
|
|
||||||
|
pub use boolean::*;
|
||||||
|
pub use nulls::*;
|
||||||
|
pub use temporal::*;
|
||||||
371
crates/qroissant-kernels/src/nulls.rs
Normal file
371
crates/qroissant-kernels/src/nulls.rs
Normal file
|
|
@ -0,0 +1,371 @@
|
||||||
|
//! Null sentinel constants and SIMD-accelerated null-detection helpers for q IPC types.
|
||||||
|
//!
|
||||||
|
//! In q's IPC protocol each fixed-width primitive has a dedicated sentinel value
|
||||||
|
//! that represents a missing (null) element. These constants are consumed by
|
||||||
|
//! both the Arrow projection layer and any serialisation code that needs to
|
||||||
|
//! round-trip q nullability semantics.
|
||||||
|
//!
|
||||||
|
//! Each `validity_*` function returns `None` when the slice contains no nulls
|
||||||
|
//! (the fast path: callers can skip building a null buffer entirely) or
|
||||||
|
//! `Some(Vec<bool>)` where `true` means the element is valid. The null check
|
||||||
|
//! uses `portable_simd` for throughput; the validity-vector build falls back to
|
||||||
|
//! a scalar loop because nulls are the uncommon case.
|
||||||
|
|
||||||
|
use std::simd::prelude::*;
|
||||||
|
|
||||||
|
/// Null sentinel for q short (i16).
|
||||||
|
pub const Q_NULL_SHORT: i16 = i16::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q int (i32).
|
||||||
|
pub const Q_NULL_INT: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q long (i64).
|
||||||
|
pub const Q_NULL_LONG: i64 = i64::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q timestamp (i64 nanoseconds since 2000.01.01).
|
||||||
|
pub const Q_NULL_TIMESTAMP: i64 = i64::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q month (i32 months since 2000.01).
|
||||||
|
pub const Q_NULL_MONTH: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q date (i32 days since 2000.01.01).
|
||||||
|
pub const Q_NULL_DATE: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q timespan (i64 nanoseconds).
|
||||||
|
pub const Q_NULL_TIMESPAN: i64 = i64::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q minute (i32 minutes).
|
||||||
|
pub const Q_NULL_MINUTE: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q second (i32 seconds).
|
||||||
|
pub const Q_NULL_SECOND: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Null sentinel for q time (i32 milliseconds).
|
||||||
|
pub const Q_NULL_TIME: i32 = i32::MIN;
|
||||||
|
|
||||||
|
/// Byte value used to encode a null boolean in the raw q IPC wire format.
|
||||||
|
/// `0` = false, `1` = true, `2` = null.
|
||||||
|
pub const Q_NULL_BOOLEAN_WIRE: u8 = 2;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Infinity sentinel constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q short (i16).
|
||||||
|
pub const Q_INF_SHORT: i16 = i16::MAX;
|
||||||
|
/// Negative infinity sentinel for q short (i16).
|
||||||
|
pub const Q_NINF_SHORT: i16 = i16::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q int (i32).
|
||||||
|
pub const Q_INF_INT: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q int (i32).
|
||||||
|
pub const Q_NINF_INT: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q long (i64).
|
||||||
|
pub const Q_INF_LONG: i64 = i64::MAX;
|
||||||
|
/// Negative infinity sentinel for q long (i64).
|
||||||
|
pub const Q_NINF_LONG: i64 = i64::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q real (f32).
|
||||||
|
pub const Q_INF_REAL: f32 = f32::INFINITY;
|
||||||
|
/// Negative infinity sentinel for q real (f32).
|
||||||
|
pub const Q_NINF_REAL: f32 = f32::NEG_INFINITY;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q float (f64).
|
||||||
|
pub const Q_INF_FLOAT: f64 = f64::INFINITY;
|
||||||
|
/// Negative infinity sentinel for q float (f64).
|
||||||
|
pub const Q_NINF_FLOAT: f64 = f64::NEG_INFINITY;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q timestamp (i64 nanoseconds).
|
||||||
|
pub const Q_INF_TIMESTAMP: i64 = i64::MAX;
|
||||||
|
/// Negative infinity sentinel for q timestamp (i64 nanoseconds).
|
||||||
|
pub const Q_NINF_TIMESTAMP: i64 = i64::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q timespan (i64 nanoseconds).
|
||||||
|
pub const Q_INF_TIMESPAN: i64 = i64::MAX;
|
||||||
|
/// Negative infinity sentinel for q timespan (i64 nanoseconds).
|
||||||
|
pub const Q_NINF_TIMESPAN: i64 = i64::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q date (i32 days).
|
||||||
|
pub const Q_INF_DATE: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q date (i32 days).
|
||||||
|
pub const Q_NINF_DATE: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q month (i32 months).
|
||||||
|
pub const Q_INF_MONTH: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q month (i32 months).
|
||||||
|
pub const Q_NINF_MONTH: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q minute (i32 minutes).
|
||||||
|
pub const Q_INF_MINUTE: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q minute (i32 minutes).
|
||||||
|
pub const Q_NINF_MINUTE: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q second (i32 seconds).
|
||||||
|
pub const Q_INF_SECOND: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q second (i32 seconds).
|
||||||
|
pub const Q_NINF_SECOND: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
/// Positive infinity sentinel for q time (i32 milliseconds).
|
||||||
|
pub const Q_INF_TIME: i32 = i32::MAX;
|
||||||
|
/// Negative infinity sentinel for q time (i32 milliseconds).
|
||||||
|
pub const Q_NINF_TIME: i32 = i32::MIN + 1;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// SIMD null-detection helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Returns a validity vector for a `&[i16]` slice using [`Q_NULL_SHORT`] as
|
||||||
|
/// the sentinel. Returns `None` when no nulls are present.
|
||||||
|
#[inline]
|
||||||
|
pub fn validity_i16(values: &[i16]) -> Option<Vec<bool>> {
|
||||||
|
const N: usize = 32;
|
||||||
|
let sentinel = Simd::<i16, N>::splat(Q_NULL_SHORT);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
let has_null = values[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.any(|c| Simd::<i16, N>::from_slice(c).simd_eq(sentinel).any())
|
||||||
|
|| values[n_aligned..].iter().any(|&v| v == Q_NULL_SHORT);
|
||||||
|
|
||||||
|
if !has_null {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(values.iter().map(|&v| v != Q_NULL_SHORT).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a validity vector for a `&[i32]` slice using the supplied sentinel.
|
||||||
|
/// Returns `None` when no nulls are present.
|
||||||
|
#[inline]
|
||||||
|
pub fn validity_i32(values: &[i32], sentinel: i32) -> Option<Vec<bool>> {
|
||||||
|
const N: usize = 16;
|
||||||
|
let sentinel_v = Simd::<i32, N>::splat(sentinel);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
let has_null = values[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.any(|c| Simd::<i32, N>::from_slice(c).simd_eq(sentinel_v).any())
|
||||||
|
|| values[n_aligned..].iter().any(|&v| v == sentinel);
|
||||||
|
|
||||||
|
if !has_null {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(values.iter().map(|&v| v != sentinel).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a validity vector for a `&[i64]` slice using the supplied sentinel.
|
||||||
|
/// Returns `None` when no nulls are present.
|
||||||
|
#[inline]
|
||||||
|
pub fn validity_i64(values: &[i64], sentinel: i64) -> Option<Vec<bool>> {
|
||||||
|
const N: usize = 8;
|
||||||
|
let sentinel_v = Simd::<i64, N>::splat(sentinel);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
let has_null = values[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.any(|c| Simd::<i64, N>::from_slice(c).simd_eq(sentinel_v).any())
|
||||||
|
|| values[n_aligned..].iter().any(|&v| v == sentinel);
|
||||||
|
|
||||||
|
if !has_null {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(values.iter().map(|&v| v != sentinel).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a validity vector for a `&[f32]` slice where `NaN` encodes null.
|
||||||
|
/// Returns `None` when no nulls are present.
|
||||||
|
#[inline]
|
||||||
|
pub fn validity_f32(values: &[f32]) -> Option<Vec<bool>> {
|
||||||
|
const N: usize = 16;
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
// NaN is the only value not equal to itself.
|
||||||
|
let has_null = values[..n_aligned].chunks_exact(N).any(|c| {
|
||||||
|
let v = Simd::<f32, N>::from_slice(c);
|
||||||
|
v.simd_ne(v).any()
|
||||||
|
}) || values[n_aligned..].iter().any(|v| v.is_nan());
|
||||||
|
|
||||||
|
if !has_null {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(values.iter().map(|v| !v.is_nan()).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a validity vector for a `&[f64]` slice where `NaN` encodes null.
|
||||||
|
/// Returns `None` when no nulls are present.
|
||||||
|
#[inline]
|
||||||
|
pub fn validity_f64(values: &[f64]) -> Option<Vec<bool>> {
|
||||||
|
const N: usize = 8;
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
let has_null = values[..n_aligned].chunks_exact(N).any(|c| {
|
||||||
|
let v = Simd::<f64, N>::from_slice(c);
|
||||||
|
v.simd_ne(v).any()
|
||||||
|
}) || values[n_aligned..].iter().any(|v| v.is_nan());
|
||||||
|
|
||||||
|
if !has_null {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(values.iter().map(|v| !v.is_nan()).collect())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// validity_i16
|
||||||
|
#[test]
|
||||||
|
fn i16_no_nulls() {
|
||||||
|
assert_eq!(validity_i16(&[1, 2, 3, 4, 5]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i16_with_null() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_i16(&[1, Q_NULL_SHORT, 3]),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i16_all_nulls() {
|
||||||
|
assert_eq!(validity_i16(&[Q_NULL_SHORT; 4]), Some(vec![false; 4]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i16_empty() {
|
||||||
|
assert_eq!(validity_i16(&[]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i16_single_null() {
|
||||||
|
assert_eq!(validity_i16(&[Q_NULL_SHORT]), Some(vec![false]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i16_null_in_remainder() {
|
||||||
|
let mut data: Vec<i16> = (1..=9).collect();
|
||||||
|
data[8] = Q_NULL_SHORT;
|
||||||
|
let v = validity_i16(&data).unwrap();
|
||||||
|
assert!(!v[8]);
|
||||||
|
assert!(v[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// validity_i32
|
||||||
|
#[test]
|
||||||
|
fn i32_no_nulls() {
|
||||||
|
assert_eq!(validity_i32(&[1, 2, 3], Q_NULL_INT), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i32_with_null() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_i32(&[1, Q_NULL_INT, 3], Q_NULL_INT),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i32_empty() {
|
||||||
|
assert_eq!(validity_i32(&[], Q_NULL_INT), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i32_all_nulls() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_i32(&[Q_NULL_INT; 3], Q_NULL_INT),
|
||||||
|
Some(vec![false; 3])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i32_null_in_remainder() {
|
||||||
|
let mut data: Vec<i32> = (1..=10).collect();
|
||||||
|
data[9] = Q_NULL_INT;
|
||||||
|
assert!(!validity_i32(&data, Q_NULL_INT).unwrap()[9]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// validity_i64
|
||||||
|
#[test]
|
||||||
|
fn i64_no_nulls() {
|
||||||
|
assert_eq!(validity_i64(&[1, 2, 3], Q_NULL_LONG), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i64_with_null() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_i64(&[1, Q_NULL_LONG, 3], Q_NULL_LONG),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i64_empty() {
|
||||||
|
assert_eq!(validity_i64(&[], Q_NULL_LONG), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn i64_timestamp_sentinel() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_i64(&[100, Q_NULL_TIMESTAMP, 300], Q_NULL_TIMESTAMP),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// validity_f32
|
||||||
|
#[test]
|
||||||
|
fn f32_no_nulls() {
|
||||||
|
assert_eq!(validity_f32(&[1.0, 2.0, 3.0]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f32_with_nan() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_f32(&[1.0, f32::NAN, 3.0]),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f32_empty() {
|
||||||
|
assert_eq!(validity_f32(&[]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f32_infinity_is_not_null() {
|
||||||
|
assert_eq!(validity_f32(&[f32::INFINITY, f32::NEG_INFINITY, 1.0]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// validity_f64
|
||||||
|
#[test]
|
||||||
|
fn f64_no_nulls() {
|
||||||
|
assert_eq!(validity_f64(&[1.0, 2.0, 3.0]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f64_with_nan() {
|
||||||
|
assert_eq!(
|
||||||
|
validity_f64(&[1.0, f64::NAN, 3.0]),
|
||||||
|
Some(vec![true, false, true])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f64_empty() {
|
||||||
|
assert_eq!(validity_f64(&[]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f64_infinity_is_not_null() {
|
||||||
|
assert_eq!(validity_f64(&[f64::INFINITY, f64::NEG_INFINITY, 1.0]), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn f64_large_aligned_with_null() {
|
||||||
|
let mut data = vec![1.0; 8];
|
||||||
|
data[7] = f64::NAN;
|
||||||
|
let v = validity_f64(&data).unwrap();
|
||||||
|
assert!(v[0]);
|
||||||
|
assert!(!v[7]);
|
||||||
|
}
|
||||||
|
}
|
||||||
317
crates/qroissant-kernels/src/temporal.rs
Normal file
317
crates/qroissant-kernels/src/temporal.rs
Normal file
|
|
@ -0,0 +1,317 @@
|
||||||
|
//! Temporal conversion constants and SIMD transforms for q ↔ Arrow mapping.
|
||||||
|
//!
|
||||||
|
//! q encodes temporal values relative to the millennium epoch (2000-01-01)
|
||||||
|
//! while Arrow uses the Unix epoch (1970-01-01). The helpers here translate
|
||||||
|
//! between the two without touching Arrow types so that this crate stays free
|
||||||
|
//! of Arrow dependencies.
|
||||||
|
//!
|
||||||
|
//! Each transform function uses `portable_simd` for the aligned middle of the
|
||||||
|
//! slice and falls back to a scalar loop for the head and tail.
|
||||||
|
|
||||||
|
use std::simd::Select;
|
||||||
|
use std::simd::prelude::*;
|
||||||
|
|
||||||
|
use crate::nulls::Q_NULL_DATE;
|
||||||
|
use crate::nulls::Q_NULL_MINUTE;
|
||||||
|
use crate::nulls::Q_NULL_TIMESTAMP;
|
||||||
|
|
||||||
|
/// Nanoseconds between 1970-01-01 and 2000-01-01.
|
||||||
|
pub const TIMESTAMP_OFFSET_NS: i64 = 946_684_800_000_000_000;
|
||||||
|
|
||||||
|
/// Days between 1970-01-01 and 2000-01-01.
|
||||||
|
pub const DATE_OFFSET_DAYS: i32 = 10_957;
|
||||||
|
|
||||||
|
/// Milliseconds in a day (used for `Datetime` float-day conversion).
|
||||||
|
pub const MILLIS_PER_DAY: f64 = 86_400_000.0;
|
||||||
|
|
||||||
|
/// Translates a slice of q timestamps (nanoseconds since 2000-01-01) into
|
||||||
|
/// Arrow `TimestampNanosecond` values (nanoseconds since 1970-01-01) in place.
|
||||||
|
///
|
||||||
|
/// Null elements (`i64::MIN`) are left unchanged; the Arrow null buffer
|
||||||
|
/// produced by [`crate::nulls::validity_i64`] will mask them.
|
||||||
|
#[inline]
|
||||||
|
pub fn offset_timestamps(values: &mut [i64]) {
|
||||||
|
const N: usize = 8;
|
||||||
|
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||||
|
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||||
|
let v = Simd::<i64, N>::from_slice(chunk);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
let added = v.saturating_add(offset_v);
|
||||||
|
let result = mask.select(added, v);
|
||||||
|
chunk.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for v in &mut values[n_aligned..] {
|
||||||
|
if *v != Q_NULL_TIMESTAMP {
|
||||||
|
*v = v.saturating_add(TIMESTAMP_OFFSET_NS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Translates a slice of q dates (days since 2000-01-01) into Arrow `Date32`
|
||||||
|
/// values (days since 1970-01-01) in place.
|
||||||
|
///
|
||||||
|
/// Null elements (`i32::MIN`) are left unchanged.
|
||||||
|
#[inline]
|
||||||
|
pub fn offset_dates(values: &mut [i32]) {
|
||||||
|
const N: usize = 16;
|
||||||
|
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||||
|
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||||
|
let v = Simd::<i32, N>::from_slice(chunk);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
let added = v.saturating_add(offset_v);
|
||||||
|
let result = mask.select(added, v);
|
||||||
|
chunk.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for v in &mut values[n_aligned..] {
|
||||||
|
if *v != Q_NULL_DATE {
|
||||||
|
*v = v.saturating_add(DATE_OFFSET_DAYS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Translates a slice of q minute values (minutes) into Arrow `Time32Second`
|
||||||
|
/// values (seconds) in place.
|
||||||
|
///
|
||||||
|
/// Null elements (`i32::MIN`) are left unchanged.
|
||||||
|
#[inline]
|
||||||
|
pub fn minutes_to_seconds(values: &mut [i32]) {
|
||||||
|
const N: usize = 16;
|
||||||
|
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||||
|
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||||
|
let n_aligned = (values.len() / N) * N;
|
||||||
|
|
||||||
|
for chunk in values[..n_aligned].chunks_exact_mut(N) {
|
||||||
|
let v = Simd::<i32, N>::from_slice(chunk);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
// Non-null minutes multiplied by 60; null sentinels selected back in.
|
||||||
|
// Wrapping multiply is safe here: the select restores the original
|
||||||
|
// sentinel value for null lanes, so overflow in null lanes is harmless.
|
||||||
|
let multiplied = v * sixty_v;
|
||||||
|
let result = mask.select(multiplied, v);
|
||||||
|
chunk.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for v in &mut values[n_aligned..] {
|
||||||
|
if *v != Q_NULL_MINUTE {
|
||||||
|
*v = v.saturating_mul(60);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copies q timestamps (nanoseconds since 2000-01-01) from `src` into `dst`,
|
||||||
|
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||||
|
///
|
||||||
|
/// Avoids the two-pass cost of `to_vec()` + `offset_timestamps()`:
|
||||||
|
/// one read from `src`, one write to `dst`, no intermediate allocation.
|
||||||
|
/// Null elements (`i64::MIN`) are copied unchanged.
|
||||||
|
///
|
||||||
|
/// `src` and `dst` must have the same length.
|
||||||
|
#[inline]
|
||||||
|
pub fn copy_and_offset_timestamps(src: &[i64], dst: &mut [i64]) {
|
||||||
|
debug_assert_eq!(src.len(), dst.len());
|
||||||
|
const N: usize = 8;
|
||||||
|
let null_v = Simd::<i64, N>::splat(Q_NULL_TIMESTAMP);
|
||||||
|
let offset_v = Simd::<i64, N>::splat(TIMESTAMP_OFFSET_NS);
|
||||||
|
let n_aligned = (src.len() / N) * N;
|
||||||
|
|
||||||
|
for (s, d) in src[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||||
|
{
|
||||||
|
let v = Simd::<i64, N>::from_slice(s);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
let result = mask.select(v.saturating_add(offset_v), v);
|
||||||
|
d.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||||
|
*d = if *s != Q_NULL_TIMESTAMP {
|
||||||
|
s.saturating_add(TIMESTAMP_OFFSET_NS)
|
||||||
|
} else {
|
||||||
|
*s
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copies q dates (days since 2000-01-01) from `src` into `dst`,
|
||||||
|
/// applying the Unix-epoch offset in a single SIMD pass.
|
||||||
|
///
|
||||||
|
/// `src` and `dst` must have the same length.
|
||||||
|
#[inline]
|
||||||
|
pub fn copy_and_offset_dates(src: &[i32], dst: &mut [i32]) {
|
||||||
|
debug_assert_eq!(src.len(), dst.len());
|
||||||
|
const N: usize = 16;
|
||||||
|
let null_v = Simd::<i32, N>::splat(Q_NULL_DATE);
|
||||||
|
let offset_v = Simd::<i32, N>::splat(DATE_OFFSET_DAYS);
|
||||||
|
let n_aligned = (src.len() / N) * N;
|
||||||
|
|
||||||
|
for (s, d) in src[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||||
|
{
|
||||||
|
let v = Simd::<i32, N>::from_slice(s);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
let result = mask.select(v.saturating_add(offset_v), v);
|
||||||
|
d.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||||
|
*d = if *s != Q_NULL_DATE {
|
||||||
|
s.saturating_add(DATE_OFFSET_DAYS)
|
||||||
|
} else {
|
||||||
|
*s
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copies q minute values from `src` into `dst`, converting minutes → seconds
|
||||||
|
/// in a single SIMD pass.
|
||||||
|
///
|
||||||
|
/// `src` and `dst` must have the same length.
|
||||||
|
#[inline]
|
||||||
|
pub fn copy_and_minutes_to_seconds(src: &[i32], dst: &mut [i32]) {
|
||||||
|
debug_assert_eq!(src.len(), dst.len());
|
||||||
|
const N: usize = 16;
|
||||||
|
let null_v = Simd::<i32, N>::splat(Q_NULL_MINUTE);
|
||||||
|
let sixty_v = Simd::<i32, N>::splat(60_i32);
|
||||||
|
let n_aligned = (src.len() / N) * N;
|
||||||
|
|
||||||
|
for (s, d) in src[..n_aligned]
|
||||||
|
.chunks_exact(N)
|
||||||
|
.zip(dst[..n_aligned].chunks_exact_mut(N))
|
||||||
|
{
|
||||||
|
let v = Simd::<i32, N>::from_slice(s);
|
||||||
|
let mask = v.simd_ne(null_v);
|
||||||
|
let multiplied = v * sixty_v;
|
||||||
|
let result = mask.select(multiplied, v);
|
||||||
|
d.copy_from_slice(&result.to_array());
|
||||||
|
}
|
||||||
|
for (s, d) in src[n_aligned..].iter().zip(dst[n_aligned..].iter_mut()) {
|
||||||
|
*d = if *s != Q_NULL_MINUTE {
|
||||||
|
s.saturating_mul(60)
|
||||||
|
} else {
|
||||||
|
*s
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// offset_timestamps
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_timestamps_basic() {
|
||||||
|
// q timestamp 1 ns since 2000 -> Unix epoch ns
|
||||||
|
let mut values = vec![1i64];
|
||||||
|
offset_timestamps(&mut values);
|
||||||
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_timestamps_zero() {
|
||||||
|
let mut values = vec![0i64];
|
||||||
|
offset_timestamps(&mut values);
|
||||||
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_timestamps_preserves_null() {
|
||||||
|
let mut values = vec![Q_NULL_TIMESTAMP];
|
||||||
|
offset_timestamps(&mut values);
|
||||||
|
assert_eq!(values[0], Q_NULL_TIMESTAMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_timestamps_mixed() {
|
||||||
|
let mut values = vec![0, Q_NULL_TIMESTAMP, 1000, Q_NULL_TIMESTAMP, 2000];
|
||||||
|
offset_timestamps(&mut values);
|
||||||
|
assert_eq!(values[0], TIMESTAMP_OFFSET_NS);
|
||||||
|
assert_eq!(values[1], Q_NULL_TIMESTAMP);
|
||||||
|
assert_eq!(values[2], TIMESTAMP_OFFSET_NS + 1000);
|
||||||
|
assert_eq!(values[3], Q_NULL_TIMESTAMP);
|
||||||
|
assert_eq!(values[4], TIMESTAMP_OFFSET_NS + 2000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_timestamps_empty() {
|
||||||
|
let mut values: Vec<i64> = vec![];
|
||||||
|
offset_timestamps(&mut values);
|
||||||
|
assert!(values.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// offset_dates
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_dates_basic() {
|
||||||
|
let mut values = vec![0i32]; // 2000-01-01 -> days since Unix epoch
|
||||||
|
offset_dates(&mut values);
|
||||||
|
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_dates_preserves_null() {
|
||||||
|
let mut values = vec![Q_NULL_DATE];
|
||||||
|
offset_dates(&mut values);
|
||||||
|
assert_eq!(values[0], Q_NULL_DATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_dates_mixed() {
|
||||||
|
let mut values = vec![0, Q_NULL_DATE, 1, Q_NULL_DATE];
|
||||||
|
offset_dates(&mut values);
|
||||||
|
assert_eq!(values[0], DATE_OFFSET_DAYS);
|
||||||
|
assert_eq!(values[1], Q_NULL_DATE);
|
||||||
|
assert_eq!(values[2], DATE_OFFSET_DAYS + 1);
|
||||||
|
assert_eq!(values[3], Q_NULL_DATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn offset_dates_empty() {
|
||||||
|
let mut values: Vec<i32> = vec![];
|
||||||
|
offset_dates(&mut values);
|
||||||
|
assert!(values.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// minutes_to_seconds
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn minutes_to_seconds_basic() {
|
||||||
|
let mut values = vec![10i32]; // 10 minutes -> 600 seconds
|
||||||
|
minutes_to_seconds(&mut values);
|
||||||
|
assert_eq!(values[0], 600);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn minutes_to_seconds_preserves_null() {
|
||||||
|
let mut values = vec![Q_NULL_MINUTE];
|
||||||
|
minutes_to_seconds(&mut values);
|
||||||
|
assert_eq!(values[0], Q_NULL_MINUTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn minutes_to_seconds_mixed() {
|
||||||
|
let mut values = vec![1, Q_NULL_MINUTE, 60];
|
||||||
|
minutes_to_seconds(&mut values);
|
||||||
|
assert_eq!(values[0], 60);
|
||||||
|
assert_eq!(values[1], Q_NULL_MINUTE);
|
||||||
|
assert_eq!(values[2], 3600);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn minutes_to_seconds_empty() {
|
||||||
|
let mut values: Vec<i32> = vec![];
|
||||||
|
minutes_to_seconds(&mut values);
|
||||||
|
assert!(values.is_empty());
|
||||||
|
}
|
||||||
|
}
|
||||||
27
crates/qroissant-python/Cargo.toml
Normal file
27
crates/qroissant-python/Cargo.toml
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
[package]
|
||||||
|
name = "qroissant-python"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "_native"
|
||||||
|
crate-type = ["cdylib", "rlib"]
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
bb8 = "0.9.0"
|
||||||
|
bytes = "1.11.1"
|
||||||
|
chrono = "0.4.44"
|
||||||
|
pyo3 = { workspace = true, features = ["extension-module"] }
|
||||||
|
pyo3-arrow = { version = "0.17.0", default-features = false }
|
||||||
|
pyo3-async-runtimes = { version = "0.28.0", features = ["tokio-runtime"] }
|
||||||
|
qroissant-arrow = { path = "../qroissant-arrow" }
|
||||||
|
qroissant-core = { path = "../qroissant-core" }
|
||||||
|
qroissant-kernels = { path = "../qroissant-kernels" }
|
||||||
|
qroissant-transport = { path = "../qroissant-transport" }
|
||||||
|
r2d2 = "0.8.10"
|
||||||
|
tabled = "0.17.0"
|
||||||
|
thiserror = "2.0.18"
|
||||||
|
tokio = { version = "1.48.0", features = ["io-util", "net", "rt-multi-thread", "sync", "time"] }
|
||||||
1597
crates/qroissant-python/src/client.rs
Normal file
1597
crates/qroissant-python/src/client.rs
Normal file
File diff suppressed because it is too large
Load diff
114
crates/qroissant-python/src/errors.rs
Normal file
114
crates/qroissant-python/src/errors.rs
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
use pyo3::create_exception;
|
||||||
|
use pyo3::exceptions::PyException;
|
||||||
|
use pyo3::exceptions::PyNotImplementedError;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyModule;
|
||||||
|
use qroissant_transport::TransportError;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
QroissantError,
|
||||||
|
PyException,
|
||||||
|
"Base exception for qroissant errors."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
DecodeError,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when q IPC payload decoding fails."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
ProtocolError,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when q IPC framing or protocol validation fails."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
TransportErrorPy,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when transport IO or socket operations fail."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
OperationError,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when an operation is unsupported in the current state."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
QRuntimeError,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when the remote q process returns an error response."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
PoolError,
|
||||||
|
QroissantError,
|
||||||
|
"Raised when connection pool management fails."
|
||||||
|
);
|
||||||
|
create_exception!(
|
||||||
|
qroissant,
|
||||||
|
PoolClosedError,
|
||||||
|
PoolError,
|
||||||
|
"Raised when a closed pool is used."
|
||||||
|
);
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum PythonError {
|
||||||
|
#[error("{0}")]
|
||||||
|
Decode(String),
|
||||||
|
#[error("{0}")]
|
||||||
|
Protocol(String),
|
||||||
|
#[error("{0}")]
|
||||||
|
Transport(String),
|
||||||
|
#[error("{0}")]
|
||||||
|
Operation(String),
|
||||||
|
#[error("{0}")]
|
||||||
|
QRuntime(String),
|
||||||
|
#[error("{0}")]
|
||||||
|
Pool(String),
|
||||||
|
#[error("connection pool is closed")]
|
||||||
|
PoolClosed,
|
||||||
|
#[error("{0}")]
|
||||||
|
NotImplemented(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PythonResult<T> = Result<T, PythonError>;
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
let py = module.py();
|
||||||
|
module.add("QroissantError", py.get_type::<QroissantError>())?;
|
||||||
|
module.add("DecodeError", py.get_type::<DecodeError>())?;
|
||||||
|
module.add("ProtocolError", py.get_type::<ProtocolError>())?;
|
||||||
|
module.add("TransportError", py.get_type::<TransportErrorPy>())?;
|
||||||
|
module.add("OperationError", py.get_type::<OperationError>())?;
|
||||||
|
module.add("QRuntimeError", py.get_type::<QRuntimeError>())?;
|
||||||
|
module.add("PoolError", py.get_type::<PoolError>())?;
|
||||||
|
module.add("PoolClosedError", py.get_type::<PoolClosedError>())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_py_err(error: PythonError) -> PyErr {
|
||||||
|
match error {
|
||||||
|
PythonError::Decode(message) => DecodeError::new_err(message),
|
||||||
|
PythonError::Protocol(message) => ProtocolError::new_err(message),
|
||||||
|
PythonError::Transport(message) => TransportErrorPy::new_err(message),
|
||||||
|
PythonError::Operation(message) => OperationError::new_err(message),
|
||||||
|
PythonError::QRuntime(message) => QRuntimeError::new_err(message),
|
||||||
|
PythonError::Pool(message) => PoolError::new_err(message),
|
||||||
|
PythonError::PoolClosed => PoolClosedError::new_err("connection pool is closed"),
|
||||||
|
PythonError::NotImplemented(message) => PyNotImplementedError::new_err(message),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn map_transport_error(error: TransportError) -> PythonError {
|
||||||
|
match error {
|
||||||
|
TransportError::Closed => PythonError::Operation(error.to_string()),
|
||||||
|
TransportError::Protocol(_) => PythonError::Protocol(error.to_string()),
|
||||||
|
TransportError::Io(_)
|
||||||
|
| TransportError::InvalidEndpoint(_)
|
||||||
|
| TransportError::InvalidQueryLength(_) => PythonError::Transport(error.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
28
crates/qroissant-python/src/lib.rs
Normal file
28
crates/qroissant-python/src/lib.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
#![allow(deprecated)]
|
||||||
|
|
||||||
|
//! Native Python module for qroissant.
|
||||||
|
|
||||||
|
mod client;
|
||||||
|
mod errors;
|
||||||
|
mod raw_response;
|
||||||
|
mod repr;
|
||||||
|
mod serde;
|
||||||
|
mod types;
|
||||||
|
mod values;
|
||||||
|
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyModule;
|
||||||
|
|
||||||
|
#[pymodule]
|
||||||
|
fn _native(_py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
module.add("__doc__", "Native qroissant extension")?;
|
||||||
|
module.add("__version__", env!("CARGO_PKG_VERSION"))?;
|
||||||
|
errors::register(module)?;
|
||||||
|
types::register(module)?;
|
||||||
|
repr::register(module)?;
|
||||||
|
values::register(module)?;
|
||||||
|
raw_response::register(module)?;
|
||||||
|
client::register(module)?;
|
||||||
|
serde::register(module)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
777
crates/qroissant-python/src/raw_response.rs
Normal file
777
crates/qroissant-python/src/raw_response.rs
Normal file
|
|
@ -0,0 +1,777 @@
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use std::sync::MutexGuard;
|
||||||
|
|
||||||
|
use pyo3::buffer::PyBuffer;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyAny;
|
||||||
|
use pyo3::types::PyBytes;
|
||||||
|
use pyo3::types::PyModule;
|
||||||
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
use qroissant_core::HEADER_LEN;
|
||||||
|
use qroissant_core::MessageHeader as CoreMessageHeader;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
|
use tokio::io::AsyncReadExt;
|
||||||
|
use tokio::io::AsyncWrite;
|
||||||
|
use tokio::task::spawn_blocking;
|
||||||
|
|
||||||
|
use crate::serde::decode_core_value;
|
||||||
|
use crate::types::Compression;
|
||||||
|
use crate::types::DecodeOptions;
|
||||||
|
use crate::types::Encoding;
|
||||||
|
use crate::types::MessageHeader;
|
||||||
|
use crate::types::MessageType;
|
||||||
|
use crate::values::core_value_to_python_with_opts;
|
||||||
|
|
||||||
|
pub(crate) trait SyncRawLease: Read + Send {
|
||||||
|
fn mark_reusable(&mut self);
|
||||||
|
fn abandon(&mut self);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) trait AsyncStreamingLease: AsyncRead + AsyncWrite + Send + Unpin {
|
||||||
|
fn mark_reusable(&mut self);
|
||||||
|
fn abandon(&mut self);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct BlockingAsyncBridge<T> {
|
||||||
|
inner: T,
|
||||||
|
handle: tokio::runtime::Handle,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> BlockingAsyncBridge<T> {
|
||||||
|
pub(crate) fn new(inner: T) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
handle: tokio::runtime::Handle::current(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Read for BlockingAsyncBridge<T>
|
||||||
|
where
|
||||||
|
T: AsyncStreamingLease,
|
||||||
|
{
|
||||||
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
let handle = self.handle.clone();
|
||||||
|
let inner = &mut self.inner;
|
||||||
|
let fut = async move { inner.read(buf).await };
|
||||||
|
if tokio::runtime::Handle::try_current().is_ok() {
|
||||||
|
tokio::task::block_in_place(|| handle.block_on(fut))
|
||||||
|
} else {
|
||||||
|
handle.block_on(fut)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> SyncRawLease for BlockingAsyncBridge<T>
|
||||||
|
where
|
||||||
|
T: AsyncStreamingLease,
|
||||||
|
{
|
||||||
|
fn mark_reusable(&mut self) {
|
||||||
|
self.inner.mark_reusable();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn abandon(&mut self) {
|
||||||
|
self.inner.abandon();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn closed_raw_response_error() -> PyErr {
|
||||||
|
pyo3::exceptions::PyValueError::new_err("I/O operation on closed qroissant raw response")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn backend_lock_error() -> PyErr {
|
||||||
|
pyo3::exceptions::PyRuntimeError::new_err("qroissant raw response state is poisoned")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unsupported_seek_error() -> PyErr {
|
||||||
|
pyo3::exceptions::PyOSError::new_err(
|
||||||
|
"qroissant raw streaming responses are forward-only and do not support seek()",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readonly_buffer_error() -> PyErr {
|
||||||
|
pyo3::exceptions::PyTypeError::new_err("readinto() requires a writable buffer")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_contiguous_buffer_error() -> PyErr {
|
||||||
|
pyo3::exceptions::PyTypeError::new_err("readinto() requires a C-contiguous buffer")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum RawReadError {
|
||||||
|
Closed,
|
||||||
|
BackendPoisoned,
|
||||||
|
PartiallyConsumed,
|
||||||
|
Io(std::io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for RawReadError {
|
||||||
|
fn from(error: std::io::Error) -> Self {
|
||||||
|
Self::Io(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for RawReadError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Closed => write!(f, "raw response is closed"),
|
||||||
|
Self::BackendPoisoned => write!(f, "raw response backend is poisoned"),
|
||||||
|
Self::PartiallyConsumed => {
|
||||||
|
write!(f, "raw response has already been partially consumed")
|
||||||
|
}
|
||||||
|
Self::Io(error) => error.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn raw_read_error_to_py(error: RawReadError) -> PyErr {
|
||||||
|
match error {
|
||||||
|
RawReadError::Closed => closed_raw_response_error(),
|
||||||
|
RawReadError::BackendPoisoned => backend_lock_error(),
|
||||||
|
RawReadError::PartiallyConsumed => pyo3::exceptions::PyValueError::new_err(
|
||||||
|
"cannot decode a partially consumed raw response",
|
||||||
|
),
|
||||||
|
RawReadError::Io(error) => PyErr::from(error),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_writable_contiguous_u8_buffer(payload: &Bound<'_, PyAny>) -> PyResult<PyBuffer<u8>> {
|
||||||
|
let buffer = PyBuffer::<u8>::get(payload)?;
|
||||||
|
if buffer.readonly() {
|
||||||
|
return Err(readonly_buffer_error());
|
||||||
|
}
|
||||||
|
if !buffer.is_c_contiguous() {
|
||||||
|
return Err(non_contiguous_buffer_error());
|
||||||
|
}
|
||||||
|
Ok(buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RawResponseBackend {
|
||||||
|
Buffered {
|
||||||
|
payload: Vec<u8>,
|
||||||
|
position: usize,
|
||||||
|
},
|
||||||
|
Streaming {
|
||||||
|
header_bytes: [u8; HEADER_LEN],
|
||||||
|
header_position: usize,
|
||||||
|
remaining_body: usize,
|
||||||
|
position: usize,
|
||||||
|
lease: Option<Box<dyn SyncRawLease>>,
|
||||||
|
},
|
||||||
|
Closed,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RawResponseState {
|
||||||
|
header: MessageHeader,
|
||||||
|
backend: RawResponseBackend,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RawResponseState {
|
||||||
|
fn streaming_remaining_total(header_position: usize, remaining_body: usize) -> usize {
|
||||||
|
(HEADER_LEN - header_position) + remaining_body
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finalize_stream(lease: &mut Option<Box<dyn SyncRawLease>>, reusable: bool) {
|
||||||
|
if let Some(mut lease) = lease.take() {
|
||||||
|
if reusable {
|
||||||
|
lease.mark_reusable();
|
||||||
|
} else {
|
||||||
|
lease.abandon();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close_backend(backend: &mut RawResponseBackend) {
|
||||||
|
let backend = std::mem::replace(backend, RawResponseBackend::Closed);
|
||||||
|
match backend {
|
||||||
|
RawResponseBackend::Buffered { .. } | RawResponseBackend::Closed => {}
|
||||||
|
RawResponseBackend::Streaming {
|
||||||
|
remaining_body,
|
||||||
|
header_position,
|
||||||
|
mut lease,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let reusable =
|
||||||
|
Self::streaming_remaining_total(header_position, remaining_body) == 0;
|
||||||
|
Self::finalize_stream(&mut lease, reusable);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_streaming_into(
|
||||||
|
header_bytes: &[u8; HEADER_LEN],
|
||||||
|
header_position: &mut usize,
|
||||||
|
remaining_body: &mut usize,
|
||||||
|
position: &mut usize,
|
||||||
|
lease: &mut Option<Box<dyn SyncRawLease>>,
|
||||||
|
out: &mut [u8],
|
||||||
|
) -> Result<usize, RawReadError> {
|
||||||
|
let total_remaining = Self::streaming_remaining_total(*header_position, *remaining_body);
|
||||||
|
if total_remaining == 0 {
|
||||||
|
Self::finalize_stream(lease, true);
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let target = out.len().min(total_remaining);
|
||||||
|
let mut filled = 0_usize;
|
||||||
|
let header_copied = if *header_position < HEADER_LEN && filled < target {
|
||||||
|
let available = HEADER_LEN - *header_position;
|
||||||
|
let to_copy = (target - filled).min(available);
|
||||||
|
out[..to_copy]
|
||||||
|
.copy_from_slice(&header_bytes[*header_position..*header_position + to_copy]);
|
||||||
|
*header_position += to_copy;
|
||||||
|
filled += to_copy;
|
||||||
|
to_copy
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
if filled < target {
|
||||||
|
while filled < target {
|
||||||
|
let lease_ref = lease
|
||||||
|
.as_mut()
|
||||||
|
.expect("streaming raw responses must hold an active lease");
|
||||||
|
let read = lease_ref.read(&mut out[filled..target])?;
|
||||||
|
if read == 0 {
|
||||||
|
Self::finalize_stream(lease, false);
|
||||||
|
return Err(std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into());
|
||||||
|
}
|
||||||
|
filled += read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let body_bytes = filled.saturating_sub(header_copied);
|
||||||
|
if body_bytes != 0 {
|
||||||
|
*remaining_body = remaining_body.saturating_sub(body_bytes);
|
||||||
|
}
|
||||||
|
*position = position.saturating_add(filled);
|
||||||
|
|
||||||
|
if Self::streaming_remaining_total(*header_position, *remaining_body) == 0 {
|
||||||
|
Self::finalize_stream(lease, true);
|
||||||
|
}
|
||||||
|
Ok(filled)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn header_from_payload(payload: &[u8]) -> PyResult<MessageHeader> {
|
||||||
|
if payload.len() < HEADER_LEN {
|
||||||
|
return Ok(MessageHeader::new_native(
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
MessageType::Response,
|
||||||
|
Compression::Uncompressed,
|
||||||
|
payload.len(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let header = CoreMessageHeader::parse(payload)
|
||||||
|
.map_err(|error| pyo3::exceptions::PyValueError::new_err(error.to_string()))?;
|
||||||
|
Ok(MessageHeader::from(header))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(module = "qroissant")]
|
||||||
|
pub struct RawResponse {
|
||||||
|
state: Arc<Mutex<RawResponseState>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RawResponse {
|
||||||
|
fn lock_state_result(&self) -> Result<MutexGuard<'_, RawResponseState>, RawReadError> {
|
||||||
|
self.state.lock().map_err(|_| RawReadError::BackendPoisoned)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lock_state(&self) -> PyResult<MutexGuard<'_, RawResponseState>> {
|
||||||
|
self.lock_state_result().map_err(raw_read_error_to_py)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_open(backend: &RawResponseBackend) -> PyResult<()> {
|
||||||
|
if matches!(backend, RawResponseBackend::Closed) {
|
||||||
|
return Err(closed_raw_response_error());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_open_result(backend: &RawResponseBackend) -> Result<(), RawReadError> {
|
||||||
|
if matches!(backend, RawResponseBackend::Closed) {
|
||||||
|
return Err(RawReadError::Closed);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn buffered(payload: Vec<u8>) -> PyResult<Self> {
|
||||||
|
let header = header_from_payload(&payload)?;
|
||||||
|
Ok(Self {
|
||||||
|
state: Arc::new(Mutex::new(RawResponseState {
|
||||||
|
header,
|
||||||
|
backend: RawResponseBackend::Buffered {
|
||||||
|
payload,
|
||||||
|
position: 0,
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn streaming(
|
||||||
|
header: MessageHeader,
|
||||||
|
header_bytes: [u8; HEADER_LEN],
|
||||||
|
remaining_body: usize,
|
||||||
|
lease: Box<dyn SyncRawLease>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
state: Arc::new(Mutex::new(RawResponseState {
|
||||||
|
header,
|
||||||
|
backend: RawResponseBackend::Streaming {
|
||||||
|
header_bytes,
|
||||||
|
header_position: 0,
|
||||||
|
remaining_body,
|
||||||
|
position: 0,
|
||||||
|
lease: Some(lease),
|
||||||
|
},
|
||||||
|
})),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn into_async(self) -> AsyncRawResponse {
|
||||||
|
let this = std::mem::ManuallyDrop::new(self);
|
||||||
|
// SAFETY: `ManuallyDrop` suppresses `RawResponse::drop`, so it is safe
|
||||||
|
// to move the owned `Arc` into the async wrapper without closing the
|
||||||
|
// underlying raw-response state.
|
||||||
|
let state = unsafe { std::ptr::read(&this.state) };
|
||||||
|
AsyncRawResponse { state }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn materialize_result(&self) -> Result<Vec<u8>, RawReadError> {
|
||||||
|
let position = {
|
||||||
|
let state = self.lock_state_result()?;
|
||||||
|
Self::ensure_open_result(&state.backend)?;
|
||||||
|
match &state.backend {
|
||||||
|
RawResponseBackend::Buffered { position, .. }
|
||||||
|
| RawResponseBackend::Streaming { position, .. } => *position,
|
||||||
|
RawResponseBackend::Closed => {
|
||||||
|
unreachable!("closed raw responses are handled above")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if position != 0 {
|
||||||
|
return Err(RawReadError::PartiallyConsumed);
|
||||||
|
}
|
||||||
|
self.read_owned_result(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_owned_result(&self, size: Option<isize>) -> Result<Vec<u8>, RawReadError> {
|
||||||
|
let mut state = self.lock_state_result()?;
|
||||||
|
Self::ensure_open_result(&state.backend)?;
|
||||||
|
match &mut state.backend {
|
||||||
|
RawResponseBackend::Buffered { payload, position } => {
|
||||||
|
if *position >= payload.len() {
|
||||||
|
return Ok(Vec::new());
|
||||||
|
}
|
||||||
|
let remaining = payload.len() - *position;
|
||||||
|
let to_read = match size {
|
||||||
|
Some(size) if size >= 0 => remaining.min(size as usize),
|
||||||
|
_ => remaining,
|
||||||
|
};
|
||||||
|
let start = *position;
|
||||||
|
let end = start + to_read;
|
||||||
|
*position = end;
|
||||||
|
Ok(payload[start..end].to_vec())
|
||||||
|
}
|
||||||
|
RawResponseBackend::Streaming {
|
||||||
|
header_bytes,
|
||||||
|
header_position,
|
||||||
|
remaining_body,
|
||||||
|
position,
|
||||||
|
lease,
|
||||||
|
} => {
|
||||||
|
let total_remaining =
|
||||||
|
RawResponseState::streaming_remaining_total(*header_position, *remaining_body);
|
||||||
|
let target = match size {
|
||||||
|
Some(size) if size >= 0 => total_remaining.min(size as usize),
|
||||||
|
_ => total_remaining,
|
||||||
|
};
|
||||||
|
let mut out = vec![0_u8; target];
|
||||||
|
match RawResponseState::read_streaming_into(
|
||||||
|
header_bytes,
|
||||||
|
header_position,
|
||||||
|
remaining_body,
|
||||||
|
position,
|
||||||
|
lease,
|
||||||
|
&mut out,
|
||||||
|
) {
|
||||||
|
Ok(filled) => {
|
||||||
|
out.truncate(filled);
|
||||||
|
Ok(out)
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
state.backend = RawResponseBackend::Closed;
|
||||||
|
Err(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RawResponseBackend::Closed => Err(RawReadError::Closed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_into_result(&self, out: &mut [u8]) -> Result<usize, RawReadError> {
|
||||||
|
let mut state = self.lock_state_result()?;
|
||||||
|
Self::ensure_open_result(&state.backend)?;
|
||||||
|
match &mut state.backend {
|
||||||
|
RawResponseBackend::Buffered { payload, position } => {
|
||||||
|
if *position >= payload.len() {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
let remaining = payload.len() - *position;
|
||||||
|
let to_read = remaining.min(out.len());
|
||||||
|
let start = *position;
|
||||||
|
let end = start + to_read;
|
||||||
|
out[..to_read].copy_from_slice(&payload[start..end]);
|
||||||
|
*position = end;
|
||||||
|
Ok(to_read)
|
||||||
|
}
|
||||||
|
RawResponseBackend::Streaming {
|
||||||
|
header_bytes,
|
||||||
|
header_position,
|
||||||
|
remaining_body,
|
||||||
|
position,
|
||||||
|
lease,
|
||||||
|
} => match RawResponseState::read_streaming_into(
|
||||||
|
header_bytes,
|
||||||
|
header_position,
|
||||||
|
remaining_body,
|
||||||
|
position,
|
||||||
|
lease,
|
||||||
|
out,
|
||||||
|
) {
|
||||||
|
Ok(filled) => Ok(filled),
|
||||||
|
Err(error) => {
|
||||||
|
state.backend = RawResponseBackend::Closed;
|
||||||
|
Err(error)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
RawResponseBackend::Closed => Err(RawReadError::Closed),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for RawResponse {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
// Clean up even if the mutex is poisoned (panic in another thread).
|
||||||
|
let mut state = match self.state.lock() {
|
||||||
|
Ok(guard) => guard,
|
||||||
|
Err(poisoned) => poisoned.into_inner(),
|
||||||
|
};
|
||||||
|
RawResponseState::close_backend(&mut state.backend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl RawResponse {
|
||||||
|
#[new]
|
||||||
|
fn new(payload: Vec<u8>) -> PyResult<Self> {
|
||||||
|
Self::buffered(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __enter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||||
|
slf
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __exit__(
|
||||||
|
&self,
|
||||||
|
_exc_type: Option<&Bound<'_, PyAny>>,
|
||||||
|
_exc_val: Option<&Bound<'_, PyAny>>,
|
||||||
|
_exc_tb: Option<&Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<()> {
|
||||||
|
self.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn closed(&self) -> bool {
|
||||||
|
self.state
|
||||||
|
.lock()
|
||||||
|
.map(|state| matches!(state.backend, RawResponseBackend::Closed))
|
||||||
|
.unwrap_or(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn header(&self) -> PyResult<MessageHeader> {
|
||||||
|
let state = self.lock_state()?;
|
||||||
|
Ok(state.header.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close(&self) -> PyResult<()> {
|
||||||
|
let mut state = self.lock_state()?;
|
||||||
|
RawResponseState::close_backend(&mut state.backend);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readable(&self) -> bool {
|
||||||
|
!self.closed()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn seekable(&self) -> bool {
|
||||||
|
self.state
|
||||||
|
.lock()
|
||||||
|
.map(|state| matches!(state.backend, RawResponseBackend::Buffered { .. }))
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (size=None))]
|
||||||
|
fn read<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyBytes>> {
|
||||||
|
let bytes = py
|
||||||
|
.detach(|| self.read_owned_result(size))
|
||||||
|
.map_err(raw_read_error_to_py)?;
|
||||||
|
Ok(PyBytes::new(py, &bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (size=None))]
|
||||||
|
fn read1<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyBytes>> {
|
||||||
|
self.read(py, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readinto(&self, py: Python<'_>, buffer: &Bound<'_, PyAny>) -> PyResult<usize> {
|
||||||
|
let writable = extract_writable_contiguous_u8_buffer(buffer)?;
|
||||||
|
let len = writable.len_bytes();
|
||||||
|
if len == 0 {
|
||||||
|
let mut empty = [];
|
||||||
|
return py
|
||||||
|
.detach(|| self.read_into_result(&mut empty))
|
||||||
|
.map_err(raw_read_error_to_py);
|
||||||
|
}
|
||||||
|
let ptr = writable.buf_ptr() as usize;
|
||||||
|
py.detach(move || {
|
||||||
|
let ptr = ptr as *mut u8;
|
||||||
|
// SAFETY: the writable Python buffer outlives this detached call and
|
||||||
|
// the slice length is bounded by the exported buffer length.
|
||||||
|
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, len) };
|
||||||
|
self.read_into_result(slice)
|
||||||
|
})
|
||||||
|
.map_err(raw_read_error_to_py)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readinto1(&self, py: Python<'_>, buffer: &Bound<'_, PyAny>) -> PyResult<usize> {
|
||||||
|
self.readinto(py, buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tell(&self) -> PyResult<usize> {
|
||||||
|
let state = self.lock_state()?;
|
||||||
|
Self::ensure_open(&state.backend)?;
|
||||||
|
match &state.backend {
|
||||||
|
RawResponseBackend::Buffered { position, .. }
|
||||||
|
| RawResponseBackend::Streaming { position, .. } => Ok(*position),
|
||||||
|
RawResponseBackend::Closed => Err(closed_raw_response_error()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (offset, whence=0))]
|
||||||
|
fn seek(&self, offset: i64, whence: i32) -> PyResult<usize> {
|
||||||
|
let mut state = self.lock_state()?;
|
||||||
|
Self::ensure_open(&state.backend)?;
|
||||||
|
match &mut state.backend {
|
||||||
|
RawResponseBackend::Buffered { payload, position } => {
|
||||||
|
let base = match whence {
|
||||||
|
0 => 0_i64,
|
||||||
|
1 => i64::try_from(*position).map_err(|_| {
|
||||||
|
pyo3::exceptions::PyOverflowError::new_err(
|
||||||
|
"raw response position exceeds supported seek range",
|
||||||
|
)
|
||||||
|
})?,
|
||||||
|
2 => i64::try_from(payload.len()).map_err(|_| {
|
||||||
|
pyo3::exceptions::PyOverflowError::new_err(
|
||||||
|
"raw response length exceeds supported seek range",
|
||||||
|
)
|
||||||
|
})?,
|
||||||
|
_ => {
|
||||||
|
return Err(pyo3::exceptions::PyValueError::new_err(format!(
|
||||||
|
"invalid seek whence value {whence}; expected 0, 1, or 2"
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let position_i64 = base.checked_add(offset).ok_or_else(|| {
|
||||||
|
pyo3::exceptions::PyOverflowError::new_err(
|
||||||
|
"raw response seek position overflowed",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
if position_i64 < 0 {
|
||||||
|
return Err(pyo3::exceptions::PyValueError::new_err(
|
||||||
|
"negative seek position is not allowed",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
*position = usize::try_from(position_i64).map_err(|_| {
|
||||||
|
pyo3::exceptions::PyOverflowError::new_err(
|
||||||
|
"raw response seek position overflowed",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
Ok(*position)
|
||||||
|
}
|
||||||
|
RawResponseBackend::Streaming { .. } => Err(unsupported_seek_error()),
|
||||||
|
RawResponseBackend::Closed => Err(closed_raw_response_error()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (*, options=None))]
|
||||||
|
fn decode(&self, py: Python<'_>, options: Option<&DecodeOptions>) -> PyResult<Py<PyAny>> {
|
||||||
|
let payload = py
|
||||||
|
.detach(|| self.materialize_result())
|
||||||
|
.map_err(raw_read_error_to_py)?;
|
||||||
|
let (value, opts) =
|
||||||
|
decode_core_value(bytes::Bytes::from(payload), options)
|
||||||
|
.map_err(crate::errors::to_py_err)?;
|
||||||
|
core_value_to_python_with_opts(py, value, opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> String {
|
||||||
|
match self.state.lock() {
|
||||||
|
Ok(state) => match &state.backend {
|
||||||
|
RawResponseBackend::Buffered { payload, position } => format!(
|
||||||
|
"RawResponse(mode='buffered', len={}, position={}, closed=false)",
|
||||||
|
payload.len(),
|
||||||
|
position
|
||||||
|
),
|
||||||
|
RawResponseBackend::Streaming {
|
||||||
|
header_position,
|
||||||
|
remaining_body,
|
||||||
|
position,
|
||||||
|
..
|
||||||
|
} => format!(
|
||||||
|
"RawResponse(mode='streaming', remaining={}, position={}, closed=false)",
|
||||||
|
RawResponseState::streaming_remaining_total(*header_position, *remaining_body),
|
||||||
|
position
|
||||||
|
),
|
||||||
|
RawResponseBackend::Closed => "RawResponse(mode='closed', closed=true)".to_string(),
|
||||||
|
},
|
||||||
|
Err(_) => "RawResponse(mode='poisoned', closed=true)".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(module = "qroissant")]
|
||||||
|
pub struct AsyncRawResponse {
|
||||||
|
state: Arc<Mutex<RawResponseState>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl AsyncRawResponse {
|
||||||
|
fn __aenter__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let state = slf.state.clone();
|
||||||
|
future_into_py(py, async move {
|
||||||
|
Python::attach(|py| Py::new(py, Self { state }).map(|value| value.into_any()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __aexit__<'py>(
|
||||||
|
&self,
|
||||||
|
py: Python<'py>,
|
||||||
|
_exc_type: Option<&Bound<'_, PyAny>>,
|
||||||
|
_exc_val: Option<&Bound<'_, PyAny>>,
|
||||||
|
_exc_tb: Option<&Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let state = self.state.clone();
|
||||||
|
future_into_py(py, async move {
|
||||||
|
let mut state = state.lock().map_err(|_| backend_lock_error())?;
|
||||||
|
RawResponseState::close_backend(&mut state.backend);
|
||||||
|
Ok(false)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn closed(&self) -> bool {
|
||||||
|
self.state
|
||||||
|
.lock()
|
||||||
|
.map(|state| matches!(state.backend, RawResponseBackend::Closed))
|
||||||
|
.unwrap_or(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn header(&self) -> PyResult<MessageHeader> {
|
||||||
|
let state = self.state.lock().map_err(|_| backend_lock_error())?;
|
||||||
|
Ok(state.header.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let state = self.state.clone();
|
||||||
|
future_into_py(py, async move {
|
||||||
|
let mut state = state.lock().map_err(|_| backend_lock_error())?;
|
||||||
|
RawResponseState::close_backend(&mut state.backend);
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (size=None))]
|
||||||
|
fn read<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let raw = RawResponse {
|
||||||
|
state: self.state.clone(),
|
||||||
|
};
|
||||||
|
future_into_py(py, async move {
|
||||||
|
let bytes = spawn_blocking(move || raw.read_owned_result(size))
|
||||||
|
.await
|
||||||
|
.map_err(|error| pyo3::exceptions::PyRuntimeError::new_err(error.to_string()))?
|
||||||
|
.map_err(raw_read_error_to_py)?;
|
||||||
|
Python::attach(|py| Ok(PyBytes::new(py, &bytes).unbind().into_any()))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (size=None))]
|
||||||
|
fn read1<'py>(&self, py: Python<'py>, size: Option<isize>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
self.read(py, size)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readinto<'py>(&self, py: Python<'py>, buffer: Py<PyAny>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let state = self.state.clone();
|
||||||
|
future_into_py(py, async move {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let buffer = buffer.bind(py);
|
||||||
|
let writable = extract_writable_contiguous_u8_buffer(buffer)?;
|
||||||
|
let len = writable.len_bytes();
|
||||||
|
if len == 0 {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
let ptr = writable.buf_ptr() as usize;
|
||||||
|
let raw = RawResponse {
|
||||||
|
state: state.clone(),
|
||||||
|
};
|
||||||
|
drop(writable);
|
||||||
|
let read = py
|
||||||
|
.detach(move || {
|
||||||
|
let ptr = ptr as *mut u8;
|
||||||
|
// SAFETY: the writable Python buffer outlives this detached call and
|
||||||
|
// the slice length is bounded by the exported buffer length.
|
||||||
|
let slice = unsafe { std::slice::from_raw_parts_mut(ptr, len) };
|
||||||
|
raw.read_into_result(slice)
|
||||||
|
})
|
||||||
|
.map_err(raw_read_error_to_py)?;
|
||||||
|
Ok(read)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn readinto1<'py>(&self, py: Python<'py>, buffer: Py<PyAny>) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
self.readinto(py, buffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (*, options=None))]
|
||||||
|
fn decode<'py>(
|
||||||
|
&self,
|
||||||
|
py: Python<'py>,
|
||||||
|
options: Option<DecodeOptions>,
|
||||||
|
) -> PyResult<Bound<'py, PyAny>> {
|
||||||
|
let raw = RawResponse {
|
||||||
|
state: self.state.clone(),
|
||||||
|
};
|
||||||
|
future_into_py(py, async move {
|
||||||
|
let payload = spawn_blocking(move || raw.materialize_result())
|
||||||
|
.await
|
||||||
|
.map_err(|error| pyo3::exceptions::PyRuntimeError::new_err(error.to_string()))?
|
||||||
|
.map_err(raw_read_error_to_py)?;
|
||||||
|
let (value, opts) =
|
||||||
|
decode_core_value(bytes::Bytes::from(payload), options.as_ref())
|
||||||
|
.map_err(crate::errors::to_py_err)?;
|
||||||
|
Python::attach(|py| core_value_to_python_with_opts(py, value, opts))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
module.add_class::<RawResponse>()?;
|
||||||
|
module.add_class::<AsyncRawResponse>()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
437
crates/qroissant-python/src/repr/cell.rs
Normal file
437
crates/qroissant-python/src/repr/cell.rs
Normal file
|
|
@ -0,0 +1,437 @@
|
||||||
|
//! Cell-level value formatting for q atoms and vector items.
|
||||||
|
//!
|
||||||
|
//! Converts raw q IPC values (CoreValue primitives) to human-readable strings
|
||||||
|
//! without any Arrow dependency. Null sentinels are rendered as `"null"`.
|
||||||
|
//! Temporal values use ISO-like formats familiar to both q and Python users.
|
||||||
|
|
||||||
|
use chrono::NaiveDate;
|
||||||
|
use chrono::NaiveDateTime;
|
||||||
|
use qroissant_core::Atom;
|
||||||
|
use qroissant_core::VectorData;
|
||||||
|
use qroissant_kernels::DATE_OFFSET_DAYS;
|
||||||
|
use qroissant_kernels::MILLIS_PER_DAY;
|
||||||
|
use qroissant_kernels::Q_NULL_DATE;
|
||||||
|
use qroissant_kernels::Q_NULL_INT;
|
||||||
|
use qroissant_kernels::Q_NULL_LONG;
|
||||||
|
use qroissant_kernels::Q_NULL_MINUTE;
|
||||||
|
use qroissant_kernels::Q_NULL_MONTH;
|
||||||
|
use qroissant_kernels::Q_NULL_SECOND;
|
||||||
|
use qroissant_kernels::Q_NULL_SHORT;
|
||||||
|
use qroissant_kernels::Q_NULL_TIME;
|
||||||
|
use qroissant_kernels::Q_NULL_TIMESPAN;
|
||||||
|
use qroissant_kernels::Q_NULL_TIMESTAMP;
|
||||||
|
use qroissant_kernels::TIMESTAMP_OFFSET_NS;
|
||||||
|
|
||||||
|
pub const MAX_CELL_CHARS: usize = 48;
|
||||||
|
|
||||||
|
/// Truncate a string to `MAX_CELL_CHARS` characters, appending `"..."` if cut.
|
||||||
|
pub fn truncate(s: String) -> String {
|
||||||
|
let mut chars = s.chars();
|
||||||
|
let head: String = chars.by_ref().take(MAX_CELL_CHARS).collect();
|
||||||
|
if chars.next().is_some() {
|
||||||
|
format!("{head}...")
|
||||||
|
} else {
|
||||||
|
head
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Temporal helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn format_date_days(q_days: i32) -> String {
|
||||||
|
// q dates are days since 2000-01-01; NaiveDate::from_ymd uses Unix days
|
||||||
|
let unix_days = q_days + DATE_OFFSET_DAYS;
|
||||||
|
match NaiveDate::from_num_days_from_ce_opt(unix_days + 719_163) {
|
||||||
|
Some(d) => d.format("%Y.%m.%d").to_string(),
|
||||||
|
None => format!("<date:{q_days}>"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_timestamp_ns(q_ns: i64) -> String {
|
||||||
|
let unix_ns = q_ns.saturating_add(TIMESTAMP_OFFSET_NS);
|
||||||
|
let secs = unix_ns.div_euclid(1_000_000_000);
|
||||||
|
let nsecs = unix_ns.rem_euclid(1_000_000_000) as u32;
|
||||||
|
match NaiveDateTime::from_timestamp_opt(secs, nsecs) {
|
||||||
|
Some(dt) => dt.format("%Y.%m.%dT%H:%M:%S.%9f").to_string(),
|
||||||
|
None => format!("<timestamp:{q_ns}>"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_month_i32(q_months: i32) -> String {
|
||||||
|
// q months are months since 2000-01; month 0 = 2000.01
|
||||||
|
let total_months = 2000 * 12 + q_months;
|
||||||
|
let year = total_months.div_euclid(12);
|
||||||
|
let month = total_months.rem_euclid(12) + 1;
|
||||||
|
format!("{year:04}.{month:02}m")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_datetime_f64(q_days: f64) -> String {
|
||||||
|
let unix_ms = q_days * MILLIS_PER_DAY + 946_684_800_000.0;
|
||||||
|
let unix_ms_i64 = unix_ms as i64;
|
||||||
|
let secs = unix_ms_i64.div_euclid(1000);
|
||||||
|
let ms = unix_ms_i64.rem_euclid(1000) as u32;
|
||||||
|
match NaiveDateTime::from_timestamp_opt(secs, ms * 1_000_000) {
|
||||||
|
Some(dt) => dt.format("%Y.%m.%dT%H:%M:%S.%3f").to_string(),
|
||||||
|
None => format!("<datetime:{q_days}>"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_timespan_ns(q_ns: i64) -> String {
|
||||||
|
// Timespans can be negative (use absolute value then sign)
|
||||||
|
let (sign, abs_ns) = if q_ns < 0 {
|
||||||
|
("-", (-(q_ns as i128)) as u64)
|
||||||
|
} else {
|
||||||
|
("", q_ns as u64)
|
||||||
|
};
|
||||||
|
let days = abs_ns / 86_400_000_000_000;
|
||||||
|
let rem = abs_ns % 86_400_000_000_000;
|
||||||
|
let hours = rem / 3_600_000_000_000;
|
||||||
|
let rem = rem % 3_600_000_000_000;
|
||||||
|
let minutes = rem / 60_000_000_000;
|
||||||
|
let rem = rem % 60_000_000_000;
|
||||||
|
let secs = rem / 1_000_000_000;
|
||||||
|
let ns = rem % 1_000_000_000;
|
||||||
|
format!("{sign}{days}D{hours:02}:{minutes:02}:{secs:02}.{ns:09}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_minute_i32(total_minutes: i32) -> String {
|
||||||
|
let h = total_minutes / 60;
|
||||||
|
let m = total_minutes % 60;
|
||||||
|
format!("{h:02}:{m:02}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_second_i32(total_seconds: i32) -> String {
|
||||||
|
let h = total_seconds / 3600;
|
||||||
|
let m = (total_seconds / 60) % 60;
|
||||||
|
let s = total_seconds % 60;
|
||||||
|
format!("{h:02}:{m:02}:{s:02}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_time_ms(total_ms: i32) -> String {
|
||||||
|
let h = total_ms / 3_600_000;
|
||||||
|
let m = (total_ms / 60_000) % 60;
|
||||||
|
let s = (total_ms / 1000) % 60;
|
||||||
|
let ms = total_ms % 1000;
|
||||||
|
format!("{h:02}:{m:02}:{s:02}.{ms:03}")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_guid_bytes(bytes: &[u8; 16]) -> String {
|
||||||
|
format!(
|
||||||
|
"{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
|
||||||
|
bytes[0],
|
||||||
|
bytes[1],
|
||||||
|
bytes[2],
|
||||||
|
bytes[3],
|
||||||
|
bytes[4],
|
||||||
|
bytes[5],
|
||||||
|
bytes[6],
|
||||||
|
bytes[7],
|
||||||
|
bytes[8],
|
||||||
|
bytes[9],
|
||||||
|
bytes[10],
|
||||||
|
bytes[11],
|
||||||
|
bytes[12],
|
||||||
|
bytes[13],
|
||||||
|
bytes[14],
|
||||||
|
bytes[15],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_symbol_bytes(bytes: &[u8]) -> String {
|
||||||
|
String::from_utf8_lossy(bytes).into_owned()
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Public API
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Format a q atom as a display string (no truncation applied).
|
||||||
|
pub fn format_atom_raw(atom: &Atom) -> String {
|
||||||
|
match atom {
|
||||||
|
Atom::Boolean(b) => if *b { "true" } else { "false" }.to_string(),
|
||||||
|
Atom::Guid(bytes) => format_guid_bytes(bytes),
|
||||||
|
Atom::Byte(b) => format!("0x{b:02x}"),
|
||||||
|
Atom::Short(v) => {
|
||||||
|
if *v == Q_NULL_SHORT {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
v.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Int(v) => {
|
||||||
|
if *v == Q_NULL_INT {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
v.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Long(v) => {
|
||||||
|
if *v == Q_NULL_LONG {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
v.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Real(v) => {
|
||||||
|
if v.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
v.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Float(v) => {
|
||||||
|
if v.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
v.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Char(b) => {
|
||||||
|
let ch = *b as char;
|
||||||
|
format!("\"{ch}\"")
|
||||||
|
}
|
||||||
|
Atom::Symbol(bytes) => format_symbol_bytes(bytes),
|
||||||
|
Atom::Timestamp(v) => {
|
||||||
|
if *v == Q_NULL_TIMESTAMP {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_timestamp_ns(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Month(v) => {
|
||||||
|
if *v == Q_NULL_MONTH {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_month_i32(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Date(v) => {
|
||||||
|
if *v == Q_NULL_DATE {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_date_days(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Datetime(v) => {
|
||||||
|
if v.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_datetime_f64(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Timespan(v) => {
|
||||||
|
if *v == Q_NULL_TIMESPAN {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_timespan_ns(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Minute(v) => {
|
||||||
|
if *v == Q_NULL_MINUTE {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_minute_i32(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Second(v) => {
|
||||||
|
if *v == Q_NULL_SECOND {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_second_i32(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Atom::Time(v) => {
|
||||||
|
if *v == Q_NULL_TIME {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_time_ms(*v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format and truncate a q atom.
|
||||||
|
pub fn format_atom_cell(atom: &Atom) -> String {
|
||||||
|
truncate(format_atom_raw(atom))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format a single element from a `VectorData` at `index` (no truncation).
|
||||||
|
pub fn format_vector_item_raw(data: &VectorData, index: usize) -> String {
|
||||||
|
match data {
|
||||||
|
VectorData::Boolean(v) => if v[index] != 0 { "true" } else { "false" }.to_string(),
|
||||||
|
VectorData::Guid(v) => {
|
||||||
|
let chunk: &[u8; 16] = v[index * 16..(index + 1) * 16].try_into().unwrap();
|
||||||
|
format_guid_bytes(chunk)
|
||||||
|
}
|
||||||
|
VectorData::Byte(v) => format!("0x{:02x}", v[index]),
|
||||||
|
VectorData::Short(_) => {
|
||||||
|
let val = data.as_i16_slice()[index];
|
||||||
|
if val == Q_NULL_SHORT {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
val.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Int(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_INT {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
val.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Long(_) => {
|
||||||
|
let val = data.as_i64_slice()[index];
|
||||||
|
if val == Q_NULL_LONG {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
val.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Real(_) => {
|
||||||
|
let val = data.as_f32_slice()[index];
|
||||||
|
if val.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
val.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Float(_) => {
|
||||||
|
let val = data.as_f64_slice()[index];
|
||||||
|
if val.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
val.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Char(v) => {
|
||||||
|
let ch = v[index] as char;
|
||||||
|
ch.to_string()
|
||||||
|
}
|
||||||
|
VectorData::Symbol(v) => format_symbol_bytes(&v[index]),
|
||||||
|
VectorData::Timestamp(_) => {
|
||||||
|
let val = data.as_i64_slice()[index];
|
||||||
|
if val == Q_NULL_TIMESTAMP {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_timestamp_ns(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Month(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_MONTH {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_month_i32(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Date(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_DATE {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_date_days(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Datetime(_) => {
|
||||||
|
let val = data.as_f64_slice()[index];
|
||||||
|
if val.is_nan() {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_datetime_f64(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Timespan(_) => {
|
||||||
|
let val = data.as_i64_slice()[index];
|
||||||
|
if val == Q_NULL_TIMESPAN {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_timespan_ns(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Minute(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_MINUTE {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_minute_i32(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Second(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_SECOND {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_second_i32(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
VectorData::Time(_) => {
|
||||||
|
let val = data.as_i32_slice()[index];
|
||||||
|
if val == Q_NULL_TIME {
|
||||||
|
"null".to_string()
|
||||||
|
} else {
|
||||||
|
format_time_ms(val)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format and truncate a single vector item.
|
||||||
|
pub fn format_vector_item(data: &VectorData, index: usize) -> String {
|
||||||
|
truncate(format_vector_item_raw(data, index))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Format a char vector as a quoted string (e.g. `"abc"`), truncated.
|
||||||
|
pub fn format_char_vector(data: &[u8]) -> String {
|
||||||
|
let s: String = data.iter().map(|&b| b as char).collect();
|
||||||
|
truncate(format!("\"{s}\""))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the q primitive label for a `VectorData`.
|
||||||
|
pub fn primitive_label(data: &VectorData) -> &'static str {
|
||||||
|
match data {
|
||||||
|
VectorData::Boolean(_) => "boolean",
|
||||||
|
VectorData::Guid(_) => "guid",
|
||||||
|
VectorData::Byte(_) => "byte",
|
||||||
|
VectorData::Short(_) => "short",
|
||||||
|
VectorData::Int(_) => "int",
|
||||||
|
VectorData::Long(_) => "long",
|
||||||
|
VectorData::Real(_) => "real",
|
||||||
|
VectorData::Float(_) => "float",
|
||||||
|
VectorData::Char(_) => "char",
|
||||||
|
VectorData::Symbol(_) => "symbol",
|
||||||
|
VectorData::Timestamp(_) => "timestamp",
|
||||||
|
VectorData::Month(_) => "month",
|
||||||
|
VectorData::Date(_) => "date",
|
||||||
|
VectorData::Datetime(_) => "datetime",
|
||||||
|
VectorData::Timespan(_) => "timespan",
|
||||||
|
VectorData::Minute(_) => "minute",
|
||||||
|
VectorData::Second(_) => "second",
|
||||||
|
VectorData::Time(_) => "time",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the q primitive label for an `Atom`.
|
||||||
|
pub fn atom_primitive_label(atom: &Atom) -> &'static str {
|
||||||
|
match atom {
|
||||||
|
Atom::Boolean(_) => "boolean",
|
||||||
|
Atom::Guid(_) => "guid",
|
||||||
|
Atom::Byte(_) => "byte",
|
||||||
|
Atom::Short(_) => "short",
|
||||||
|
Atom::Int(_) => "int",
|
||||||
|
Atom::Long(_) => "long",
|
||||||
|
Atom::Real(_) => "real",
|
||||||
|
Atom::Float(_) => "float",
|
||||||
|
Atom::Char(_) => "char",
|
||||||
|
Atom::Symbol(_) => "symbol",
|
||||||
|
Atom::Timestamp(_) => "timestamp",
|
||||||
|
Atom::Month(_) => "month",
|
||||||
|
Atom::Date(_) => "date",
|
||||||
|
Atom::Datetime(_) => "datetime",
|
||||||
|
Atom::Timespan(_) => "timespan",
|
||||||
|
Atom::Minute(_) => "minute",
|
||||||
|
Atom::Second(_) => "second",
|
||||||
|
Atom::Time(_) => "time",
|
||||||
|
}
|
||||||
|
}
|
||||||
278
crates/qroissant-python/src/repr/format.rs
Normal file
278
crates/qroissant-python/src/repr/format.rs
Normal file
|
|
@ -0,0 +1,278 @@
|
||||||
|
//! High-level format functions for each q value shape.
|
||||||
|
//!
|
||||||
|
//! Each function produces a multi-line ASCII repr string. Rendering is driven
|
||||||
|
//! by the active [`FormattingOptions`] (read from the process-wide global).
|
||||||
|
|
||||||
|
use qroissant_core::Atom as CoreAtom;
|
||||||
|
use qroissant_core::Dictionary as CoreDictionary;
|
||||||
|
use qroissant_core::List as CoreList;
|
||||||
|
use qroissant_core::Table as CoreTable;
|
||||||
|
use qroissant_core::Value as CoreValue;
|
||||||
|
use qroissant_core::Vector as CoreVector;
|
||||||
|
use qroissant_core::VectorData;
|
||||||
|
|
||||||
|
use super::cell::atom_primitive_label;
|
||||||
|
use super::cell::format_atom_cell;
|
||||||
|
use super::cell::format_atom_raw;
|
||||||
|
use super::cell::format_char_vector;
|
||||||
|
use super::cell::format_vector_item;
|
||||||
|
use super::cell::primitive_label;
|
||||||
|
use super::cell::truncate;
|
||||||
|
use super::options::active_options;
|
||||||
|
use super::render::PreviewSlot;
|
||||||
|
use super::render::preview_slots;
|
||||||
|
use super::render::render_preview;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Attribute helper
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn attribute_label(attribute: qroissant_core::Attribute) -> &'static str {
|
||||||
|
match attribute {
|
||||||
|
qroissant_core::Attribute::None => "none",
|
||||||
|
qroissant_core::Attribute::Sorted => "sorted",
|
||||||
|
qroissant_core::Attribute::Unique => "unique",
|
||||||
|
qroissant_core::Attribute::Parted => "parted",
|
||||||
|
qroissant_core::Attribute::Grouped => "grouped",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Atom
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub fn format_atom(atom: &CoreAtom) -> String {
|
||||||
|
let label = atom_primitive_label(atom);
|
||||||
|
render_preview(
|
||||||
|
vec![format!("Atom [{label}]")],
|
||||||
|
vec!["value".to_string()],
|
||||||
|
vec![vec![format_atom_cell(atom)]],
|
||||||
|
vec!["shape: (1,)".to_string()],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Vector
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub fn format_vector(vector: &CoreVector) -> String {
|
||||||
|
let len = vector.len();
|
||||||
|
let data = vector.data();
|
||||||
|
let label = primitive_label(data);
|
||||||
|
let attr = vector.attribute();
|
||||||
|
|
||||||
|
let rows = match data {
|
||||||
|
VectorData::Char(chars) => {
|
||||||
|
vec![vec![format_char_vector(chars)]]
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let opts = active_options();
|
||||||
|
preview_slots(len, opts.max_rows, opts.row_display)
|
||||||
|
.into_iter()
|
||||||
|
.map(|slot| match slot {
|
||||||
|
PreviewSlot::Index(i) => vec![format_vector_item(data, i)],
|
||||||
|
PreviewSlot::Ellipsis => vec!["...".to_string()],
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
render_preview(
|
||||||
|
vec![format!("Vector [{label}, attr={}]", attribute_label(attr))],
|
||||||
|
vec!["value".to_string()],
|
||||||
|
rows,
|
||||||
|
vec![format!("shape: ({len},)")],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// List
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Compact single-line summary of any `CoreValue` (used for list/dict cells).
|
||||||
|
fn inline_value_summary(value: &CoreValue) -> String {
|
||||||
|
match value {
|
||||||
|
CoreValue::Atom(atom) => truncate(format!(
|
||||||
|
"{} [{}]",
|
||||||
|
format_atom_raw(atom),
|
||||||
|
atom_primitive_label(atom)
|
||||||
|
)),
|
||||||
|
CoreValue::Vector(vector) => {
|
||||||
|
let label = primitive_label(vector.data());
|
||||||
|
let len = vector.len();
|
||||||
|
match vector.data() {
|
||||||
|
VectorData::Char(chars) => truncate(format_char_vector(chars)),
|
||||||
|
_ => truncate(format!("vector<{label}>[{len}]")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CoreValue::List(list) => truncate(format!("list[{}]", list.len())),
|
||||||
|
CoreValue::Dictionary(dict) => truncate(format!("dict[{}]", dict.len())),
|
||||||
|
CoreValue::Table(table) => {
|
||||||
|
truncate(format!("table[{}x{}]", table.len(), table.num_columns()))
|
||||||
|
}
|
||||||
|
CoreValue::UnaryPrimitive { opcode } => truncate(format!("unary(0x{opcode:02x})")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_list(list: &CoreList) -> String {
|
||||||
|
let len = list.len();
|
||||||
|
let opts = active_options();
|
||||||
|
let attr = list.attribute();
|
||||||
|
|
||||||
|
let rows = preview_slots(len, opts.max_rows, opts.row_display)
|
||||||
|
.into_iter()
|
||||||
|
.map(|slot| match slot {
|
||||||
|
PreviewSlot::Index(i) => vec![inline_value_summary(&list.values()[i])],
|
||||||
|
PreviewSlot::Ellipsis => vec!["...".to_string()],
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
render_preview(
|
||||||
|
vec![format!("List [list, attr={}]", attribute_label(attr))],
|
||||||
|
vec!["value".to_string()],
|
||||||
|
rows,
|
||||||
|
vec![format!("shape: ({len},)")],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Dictionary
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
pub fn format_dictionary(dict: &CoreDictionary) -> String {
|
||||||
|
let size = dict.len();
|
||||||
|
let sorted = dict.sorted();
|
||||||
|
|
||||||
|
let all_rows = vec![
|
||||||
|
vec!["keys".to_string(), inline_value_summary(dict.keys())],
|
||||||
|
vec!["values".to_string(), inline_value_summary(dict.values())],
|
||||||
|
];
|
||||||
|
|
||||||
|
let opts = active_options();
|
||||||
|
let rows = preview_slots(all_rows.len(), opts.max_rows, opts.row_display)
|
||||||
|
.into_iter()
|
||||||
|
.map(|slot| match slot {
|
||||||
|
PreviewSlot::Index(i) => all_rows[i].clone(),
|
||||||
|
PreviewSlot::Ellipsis => vec!["...".to_string(), "...".to_string()],
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
render_preview(
|
||||||
|
vec![format!("Dictionary [dict, sorted={sorted}]")],
|
||||||
|
vec!["part".to_string(), "value".to_string()],
|
||||||
|
rows,
|
||||||
|
vec![format!("shape: ({size},)")],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Table
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn column_primitive_label(col: &CoreValue) -> &'static str {
|
||||||
|
match col {
|
||||||
|
CoreValue::Vector(v) => primitive_label(v.data()),
|
||||||
|
CoreValue::List(_) => "list",
|
||||||
|
CoreValue::Atom(_) => "atom",
|
||||||
|
_ => "?",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn table_cell(col: &CoreValue, row_index: usize) -> String {
|
||||||
|
match col {
|
||||||
|
CoreValue::Vector(v) => match v.data() {
|
||||||
|
VectorData::Char(chars) => {
|
||||||
|
// Show a single char per cell
|
||||||
|
if row_index < chars.len() {
|
||||||
|
(chars[row_index] as char).to_string()
|
||||||
|
} else {
|
||||||
|
"?".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
data => format_vector_item(data, row_index),
|
||||||
|
},
|
||||||
|
CoreValue::Atom(atom) => format_atom_cell(atom),
|
||||||
|
CoreValue::List(list) => {
|
||||||
|
if row_index < list.len() {
|
||||||
|
inline_value_summary(&list.values()[row_index])
|
||||||
|
} else {
|
||||||
|
"?".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => inline_value_summary(col),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn column_name(raw: &[u8]) -> String {
|
||||||
|
String::from_utf8_lossy(raw).into_owned()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_table(table: &CoreTable) -> String {
|
||||||
|
let num_rows = table.len();
|
||||||
|
let num_cols = table.num_columns();
|
||||||
|
let opts = active_options();
|
||||||
|
let visible_cols = num_cols.min(opts.max_columns);
|
||||||
|
|
||||||
|
// Build headers: "name\ntype" for each visible column
|
||||||
|
let mut headers: Vec<String> = table
|
||||||
|
.column_names()
|
||||||
|
.iter()
|
||||||
|
.zip(table.columns().iter())
|
||||||
|
.take(visible_cols)
|
||||||
|
.map(|(name, col)| {
|
||||||
|
let col_name = truncate(column_name(name));
|
||||||
|
let type_label = column_primitive_label(col);
|
||||||
|
format!("{col_name}\n{type_label}")
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if num_cols > visible_cols {
|
||||||
|
headers.push("...\n...".to_string());
|
||||||
|
} else if headers.is_empty() {
|
||||||
|
headers.push("value".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build rows
|
||||||
|
let row_slots = preview_slots(num_rows, opts.max_rows, opts.row_display);
|
||||||
|
let columns = table.columns();
|
||||||
|
|
||||||
|
let body_rows: Vec<Vec<String>> = row_slots
|
||||||
|
.into_iter()
|
||||||
|
.map(|slot| {
|
||||||
|
let mut row: Vec<String> = match slot {
|
||||||
|
PreviewSlot::Index(row_i) => (0..visible_cols)
|
||||||
|
.map(|col_i| table_cell(&columns[col_i], row_i))
|
||||||
|
.collect(),
|
||||||
|
PreviewSlot::Ellipsis => vec!["...".to_string(); visible_cols.max(1)],
|
||||||
|
};
|
||||||
|
if num_cols > visible_cols {
|
||||||
|
row.push("...".to_string());
|
||||||
|
}
|
||||||
|
row
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
render_preview(
|
||||||
|
vec![format!(
|
||||||
|
"Table [table, attr={}]",
|
||||||
|
attribute_label(table.attribute())
|
||||||
|
)],
|
||||||
|
headers,
|
||||||
|
body_rows,
|
||||||
|
vec![format!("shape: ({num_rows}, {num_cols})")],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// UnaryPrimitive
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn format_unary_primitive(opcode: i8) -> String {
|
||||||
|
render_preview(
|
||||||
|
vec!["UnaryPrimitive [unary_primitive]".to_string()],
|
||||||
|
vec!["opcode".to_string()],
|
||||||
|
vec![vec![format!("0x{opcode:02x}")]],
|
||||||
|
vec!["shape: (1,)".to_string()],
|
||||||
|
)
|
||||||
|
}
|
||||||
26
crates/qroissant-python/src/repr/mod.rs
Normal file
26
crates/qroissant-python/src/repr/mod.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
//! Pretty repr system for qroissant Python values.
|
||||||
|
//!
|
||||||
|
//! This module provides:
|
||||||
|
//! - [`options`] — global `FormattingOptions`, `RowDisplay`, and associated
|
||||||
|
//! builder and pyfunctions (`get_formatting_options`, `set_formatting_options`,
|
||||||
|
//! `reset_formatting_options`).
|
||||||
|
//! - [`cell`] — individual q value → string conversion without Arrow.
|
||||||
|
//! - [`render`] — ASCII table rendering via `tabled` and `preview_slots`.
|
||||||
|
//! - [`format`] — shape-level formatting functions called by `__repr__`/`__str__`.
|
||||||
|
|
||||||
|
pub mod cell;
|
||||||
|
pub mod format;
|
||||||
|
pub mod options;
|
||||||
|
pub mod render;
|
||||||
|
|
||||||
|
pub use format::format_atom;
|
||||||
|
pub use format::format_dictionary;
|
||||||
|
pub use format::format_list;
|
||||||
|
pub use format::format_table;
|
||||||
|
pub use format::format_vector;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyModule;
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
options::register(module)
|
||||||
|
}
|
||||||
172
crates/qroissant-python/src/repr/options.rs
Normal file
172
crates/qroissant-python/src/repr/options.rs
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
//! Global repr formatting options and associated Python types.
|
||||||
|
|
||||||
|
use std::sync::OnceLock;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyModule;
|
||||||
|
|
||||||
|
/// Row selection strategy used by qroissant repr formatting.
|
||||||
|
#[derive(PartialEq, Eq, Default, Clone, Copy, Debug)]
|
||||||
|
#[pyclass(
|
||||||
|
eq,
|
||||||
|
eq_int,
|
||||||
|
frozen,
|
||||||
|
rename_all = "SCREAMING_SNAKE_CASE",
|
||||||
|
module = "qroissant"
|
||||||
|
)]
|
||||||
|
pub enum RowDisplay {
|
||||||
|
/// Show the first `max_rows` rows followed by an ellipsis when truncated.
|
||||||
|
#[default]
|
||||||
|
Head,
|
||||||
|
/// Show the first half and last half of rows with an ellipsis in the middle.
|
||||||
|
HeadTail,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl RowDisplay {
|
||||||
|
fn __repr__(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Head => "RowDisplay.HEAD",
|
||||||
|
Self::HeadTail => "RowDisplay.HEAD_TAIL",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Formatting options for user-facing qroissant string representations.
|
||||||
|
///
|
||||||
|
/// Notes
|
||||||
|
/// -----
|
||||||
|
/// These options control how qroissant values render through `str(...)` and
|
||||||
|
/// `repr(...)`. Apply them process-wide through `set_formatting_options(...)`.
|
||||||
|
#[pyclass(get_all, eq, frozen, skip_from_py_object, module = "qroissant")]
|
||||||
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||||
|
pub struct FormattingOptions {
|
||||||
|
pub max_rows: usize,
|
||||||
|
pub max_columns: usize,
|
||||||
|
pub row_display: RowDisplay,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FormattingOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
max_rows: 8,
|
||||||
|
max_columns: 6,
|
||||||
|
row_display: RowDisplay::Head,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl FormattingOptions {
|
||||||
|
#[staticmethod]
|
||||||
|
/// Create a builder initialized with qroissant's default formatting policy.
|
||||||
|
fn builder() -> FormattingOptionsBuilder {
|
||||||
|
FormattingOptionsBuilder::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> String {
|
||||||
|
format!(
|
||||||
|
"FormattingOptions(max_rows={}, max_columns={}, row_display={})",
|
||||||
|
self.max_rows,
|
||||||
|
self.max_columns,
|
||||||
|
self.row_display.__repr__(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builder for [`FormattingOptions`].
|
||||||
|
#[pyclass(skip_from_py_object, module = "qroissant")]
|
||||||
|
#[derive(Default, Clone, Debug)]
|
||||||
|
pub struct FormattingOptionsBuilder {
|
||||||
|
inner: FormattingOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl FormattingOptionsBuilder {
|
||||||
|
#[pyo3(signature = (value, /))]
|
||||||
|
fn with_max_rows(&self, value: usize) -> Self {
|
||||||
|
let mut b = self.clone();
|
||||||
|
b.inner.max_rows = value;
|
||||||
|
b
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (value, /))]
|
||||||
|
fn with_max_columns(&self, value: usize) -> Self {
|
||||||
|
let mut b = self.clone();
|
||||||
|
b.inner.max_columns = value;
|
||||||
|
b
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (value, /))]
|
||||||
|
fn with_row_display(&self, value: RowDisplay) -> Self {
|
||||||
|
let mut b = self.clone();
|
||||||
|
b.inner.row_display = value;
|
||||||
|
b
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finalize the builder into an immutable `FormattingOptions` instance.
|
||||||
|
fn build(&self) -> FormattingOptions {
|
||||||
|
self.inner.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> String {
|
||||||
|
format!("FormattingOptionsBuilder({})", self.inner.__repr__())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Global state
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
fn options_lock() -> &'static RwLock<FormattingOptions> {
|
||||||
|
static OPTIONS: OnceLock<RwLock<FormattingOptions>> = OnceLock::new();
|
||||||
|
OPTIONS.get_or_init(|| RwLock::new(FormattingOptions::default()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn active_options() -> FormattingOptions {
|
||||||
|
match options_lock().read() {
|
||||||
|
Ok(guard) => guard.clone(),
|
||||||
|
Err(poisoned) => poisoned.into_inner().clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn store_options(options: FormattingOptions) {
|
||||||
|
match options_lock().write() {
|
||||||
|
Ok(mut guard) => *guard = options,
|
||||||
|
Err(poisoned) => *poisoned.into_inner() = options,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Python-visible functions
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
/// Return the active qroissant repr formatting options.
|
||||||
|
pub fn get_formatting_options() -> FormattingOptions {
|
||||||
|
active_options()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (options, /))]
|
||||||
|
/// Update the active qroissant repr formatting options.
|
||||||
|
pub fn set_formatting_options(options: PyRef<'_, FormattingOptions>) {
|
||||||
|
store_options(options.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
/// Restore qroissant's default repr formatting options.
|
||||||
|
pub fn reset_formatting_options() {
|
||||||
|
store_options(FormattingOptions::default());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
module.add_class::<RowDisplay>()?;
|
||||||
|
module.add_class::<FormattingOptions>()?;
|
||||||
|
module.add_class::<FormattingOptionsBuilder>()?;
|
||||||
|
module.add_function(wrap_pyfunction!(get_formatting_options, module)?)?;
|
||||||
|
module.add_function(wrap_pyfunction!(set_formatting_options, module)?)?;
|
||||||
|
module.add_function(wrap_pyfunction!(reset_formatting_options, module)?)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
80
crates/qroissant-python/src/repr/render.rs
Normal file
80
crates/qroissant-python/src/repr/render.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
||||||
|
//! ASCII table rendering via the `tabled` crate and row-slot utilities.
|
||||||
|
|
||||||
|
use tabled::builder::Builder;
|
||||||
|
use tabled::settings::Alignment;
|
||||||
|
use tabled::settings::Modify;
|
||||||
|
use tabled::settings::Style;
|
||||||
|
use tabled::settings::object::Rows;
|
||||||
|
use tabled::settings::style::HorizontalLine;
|
||||||
|
|
||||||
|
use super::options::RowDisplay;
|
||||||
|
|
||||||
|
/// A slot in a preview: either a concrete row index or an ellipsis separator.
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub enum PreviewSlot {
|
||||||
|
Index(usize),
|
||||||
|
Ellipsis,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the row slots to show when rendering at most `max_rows` out of
|
||||||
|
/// `total`, using `row_display` to decide whether to use head or head+tail.
|
||||||
|
pub fn preview_slots(total: usize, max_rows: usize, row_display: RowDisplay) -> Vec<PreviewSlot> {
|
||||||
|
if total == 0 || max_rows == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
if total <= max_rows {
|
||||||
|
return (0..total).map(PreviewSlot::Index).collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
match row_display {
|
||||||
|
RowDisplay::Head => {
|
||||||
|
let mut slots = (0..max_rows).map(PreviewSlot::Index).collect::<Vec<_>>();
|
||||||
|
slots.push(PreviewSlot::Ellipsis);
|
||||||
|
slots
|
||||||
|
}
|
||||||
|
RowDisplay::HeadTail if max_rows == 1 => {
|
||||||
|
vec![PreviewSlot::Index(0), PreviewSlot::Ellipsis]
|
||||||
|
}
|
||||||
|
RowDisplay::HeadTail => {
|
||||||
|
let head = max_rows.div_ceil(2);
|
||||||
|
let tail = max_rows / 2;
|
||||||
|
let mut slots = (0..head).map(PreviewSlot::Index).collect::<Vec<_>>();
|
||||||
|
slots.push(PreviewSlot::Ellipsis);
|
||||||
|
let tail_start = total.saturating_sub(tail);
|
||||||
|
slots.extend((tail_start..total).map(PreviewSlot::Index));
|
||||||
|
slots
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build an ASCII table with a modern style and a horizontal line after the
|
||||||
|
/// header row.
|
||||||
|
pub fn render_table(headers: Vec<String>, rows: Vec<Vec<String>>) -> String {
|
||||||
|
let mut builder = Builder::default();
|
||||||
|
builder.push_record(headers);
|
||||||
|
for row in rows {
|
||||||
|
builder.push_record(row);
|
||||||
|
}
|
||||||
|
let mut table = builder.build();
|
||||||
|
table.with(
|
||||||
|
Style::modern()
|
||||||
|
.remove_horizontal()
|
||||||
|
.horizontals([(1, HorizontalLine::inherit(Style::modern()))]),
|
||||||
|
);
|
||||||
|
table.with(Modify::new(Rows::first()).with(Alignment::left()));
|
||||||
|
table.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assemble a full repr block: optional title lines, a table, optional footer.
|
||||||
|
pub fn render_preview(
|
||||||
|
title_lines: Vec<String>,
|
||||||
|
headers: Vec<String>,
|
||||||
|
rows: Vec<Vec<String>>,
|
||||||
|
footer_lines: Vec<String>,
|
||||||
|
) -> String {
|
||||||
|
let mut sections = title_lines;
|
||||||
|
sections.push(render_table(headers, rows));
|
||||||
|
sections.extend(footer_lines);
|
||||||
|
sections.join("\n")
|
||||||
|
}
|
||||||
215
crates/qroissant-python/src/serde.rs
Normal file
215
crates/qroissant-python/src/serde.rs
Normal file
|
|
@ -0,0 +1,215 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyAny;
|
||||||
|
use pyo3::types::PyBytes;
|
||||||
|
use qroissant_arrow::ListProjection;
|
||||||
|
use qroissant_arrow::ProjectionOptions;
|
||||||
|
use qroissant_arrow::StringProjection;
|
||||||
|
use qroissant_arrow::SymbolProjection;
|
||||||
|
use qroissant_core::DecodeOptions as CoreDecodeOptions;
|
||||||
|
use qroissant_core::Value as CoreValue;
|
||||||
|
use qroissant_core::decode_message_with_options;
|
||||||
|
use qroissant_core::encode_message;
|
||||||
|
use qroissant_transport::extract_q_error;
|
||||||
|
|
||||||
|
use crate::errors::PythonError;
|
||||||
|
use crate::errors::PythonResult;
|
||||||
|
use crate::errors::to_py_err;
|
||||||
|
use crate::types::Compression;
|
||||||
|
use crate::types::DecodeOptions;
|
||||||
|
use crate::types::EncodeOptions;
|
||||||
|
use crate::types::Encoding;
|
||||||
|
use crate::types::ListInterpretation;
|
||||||
|
use crate::types::MessageType;
|
||||||
|
use crate::types::StringInterpretation;
|
||||||
|
use crate::types::SymbolInterpretation;
|
||||||
|
use crate::values::core_value_to_python_with_opts;
|
||||||
|
use crate::values::python_to_core_value;
|
||||||
|
|
||||||
|
/// Maps Python-facing "Interpretation" options to Rust-internal "Projection" options.
|
||||||
|
///
|
||||||
|
/// The Python API uses "Interpretation" (e.g. `SymbolInterpretation`) as it describes
|
||||||
|
/// how the user wants data to be interpreted. The Rust/Arrow layer uses "Projection"
|
||||||
|
/// (e.g. `SymbolProjection`) as it describes how values are projected into Arrow arrays.
|
||||||
|
/// Both refer to the same concept viewed from different perspectives.
|
||||||
|
pub fn decode_options_to_proj_opts(opts: Option<&DecodeOptions>) -> Arc<ProjectionOptions> {
|
||||||
|
let opts = opts.map(|o| o.clone()).unwrap_or_default();
|
||||||
|
Arc::new(ProjectionOptions {
|
||||||
|
symbol: match opts.symbol_interpretation_value() {
|
||||||
|
SymbolInterpretation::Utf8 => SymbolProjection::Utf8,
|
||||||
|
SymbolInterpretation::LargeUtf8 => SymbolProjection::LargeUtf8,
|
||||||
|
SymbolInterpretation::Utf8View => SymbolProjection::Utf8View,
|
||||||
|
SymbolInterpretation::Dictionary => SymbolProjection::Dictionary,
|
||||||
|
SymbolInterpretation::RawBytes => SymbolProjection::RawBytes,
|
||||||
|
},
|
||||||
|
string: match opts.string_interpretation_value() {
|
||||||
|
StringInterpretation::Utf8 => StringProjection::Utf8,
|
||||||
|
StringInterpretation::Binary => StringProjection::Binary,
|
||||||
|
},
|
||||||
|
list: match opts.list_interpretation_value() {
|
||||||
|
ListInterpretation::List => ListProjection::List,
|
||||||
|
ListInterpretation::LargeList => ListProjection::LargeList,
|
||||||
|
ListInterpretation::ListView => ListProjection::ListView,
|
||||||
|
},
|
||||||
|
union_mode: match opts.union_mode_value() {
|
||||||
|
crate::types::UnionMode::Dense => qroissant_arrow::UnionMode::Dense,
|
||||||
|
crate::types::UnionMode::Sparse => qroissant_arrow::UnionMode::Sparse,
|
||||||
|
},
|
||||||
|
treat_infinity_as_null: opts.treat_infinity_as_null(),
|
||||||
|
parallel: opts.parallel_value(),
|
||||||
|
assume_symbol_utf8: opts.assume_symbol_utf8_value(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_options_to_core(opts: &DecodeOptions) -> CoreDecodeOptions {
|
||||||
|
CoreDecodeOptions {
|
||||||
|
parallel: opts.parallel_value(),
|
||||||
|
..CoreDecodeOptions::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_default_encode_options(options: Option<&EncodeOptions>) -> PythonResult<()> {
|
||||||
|
if let Some(options) = options
|
||||||
|
&& options != &EncodeOptions::default()
|
||||||
|
{
|
||||||
|
return Err(PythonError::NotImplemented(
|
||||||
|
"custom encode options are not implemented yet".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_core_value(
|
||||||
|
payload: bytes::Bytes,
|
||||||
|
options: Option<&DecodeOptions>,
|
||||||
|
) -> PythonResult<(CoreValue, Arc<ProjectionOptions>)> {
|
||||||
|
if let Some(message) =
|
||||||
|
extract_q_error(payload.as_ref()).map_err(crate::errors::map_transport_error)?
|
||||||
|
{
|
||||||
|
return Err(PythonError::QRuntime(message));
|
||||||
|
}
|
||||||
|
let core_opts = options.map(decode_options_to_core).unwrap_or_default();
|
||||||
|
let decoded = decode_message_with_options(payload, &core_opts)
|
||||||
|
.map_err(|error| PythonError::Decode(error.to_string()))?;
|
||||||
|
let proj_opts = decode_options_to_proj_opts(options);
|
||||||
|
let (_header, value) = decoded.into_parts();
|
||||||
|
Ok((value, proj_opts))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wraps a Python `bytes` object in a [`bytes::Bytes`] without copying.
|
||||||
|
///
|
||||||
|
/// CPython `bytes` objects are immutable and their backing memory is never
|
||||||
|
/// moved, so it is sound to hold a raw pointer into them as long as the
|
||||||
|
/// `Py<PyBytes>` reference (which increments the CPython refcount) is alive.
|
||||||
|
struct PinnedPyBytes {
|
||||||
|
_owner: Py<PyBytes>,
|
||||||
|
ptr: *const u8,
|
||||||
|
len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
// SAFETY: `Py<PyBytes>` is `Send`, and the pointed-to memory is immutable.
|
||||||
|
unsafe impl Send for PinnedPyBytes {}
|
||||||
|
// SAFETY: The data is immutable and the owner keeps it alive.
|
||||||
|
unsafe impl Sync for PinnedPyBytes {}
|
||||||
|
|
||||||
|
impl AsRef<[u8]> for PinnedPyBytes {
|
||||||
|
#[inline]
|
||||||
|
fn as_ref(&self) -> &[u8] {
|
||||||
|
// SAFETY: `ptr` is valid for `len` bytes while `_owner` keeps the
|
||||||
|
// CPython bytes object alive (refcount > 0, no deallocation possible).
|
||||||
|
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Minimum payload size for the zero-copy `PinnedPyBytes` path.
|
||||||
|
///
|
||||||
|
/// For small payloads the `Arc` allocation inside `Bytes::from_owner` costs
|
||||||
|
/// more than a plain `memcpy`, so we fall back to copying below this threshold.
|
||||||
|
const ZERO_COPY_MIN_BYTES: usize = 32 * 1024; // 32 KB
|
||||||
|
|
||||||
|
/// Converts a Python `bytes`-like object into a [`bytes::Bytes`].
|
||||||
|
///
|
||||||
|
/// For plain `bytes` objects ≥ [`ZERO_COPY_MIN_BYTES`] the underlying buffer
|
||||||
|
/// is **borrowed without copying** via [`bytes::Bytes::from_owner`].
|
||||||
|
/// Smaller payloads and other buffer protocols (bytearray, memoryview) take a
|
||||||
|
/// single copy — same cost as before.
|
||||||
|
fn payload_to_bytes(payload: &Bound<'_, PyAny>) -> PyResult<bytes::Bytes> {
|
||||||
|
if let Ok(pb) = payload.downcast::<PyBytes>() {
|
||||||
|
let data = pb.as_bytes();
|
||||||
|
if data.len() >= ZERO_COPY_MIN_BYTES {
|
||||||
|
let pinned = PinnedPyBytes {
|
||||||
|
_owner: pb.clone().unbind(),
|
||||||
|
ptr: data.as_ptr(),
|
||||||
|
len: data.len(),
|
||||||
|
};
|
||||||
|
return Ok(bytes::Bytes::from_owner(pinned));
|
||||||
|
}
|
||||||
|
return Ok(bytes::Bytes::copy_from_slice(data));
|
||||||
|
}
|
||||||
|
Ok(bytes::Bytes::from(payload.extract::<Vec<u8>>()?))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_core_value_bytes(
|
||||||
|
value: &CoreValue,
|
||||||
|
options: Option<&EncodeOptions>,
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
) -> PythonResult<Vec<u8>> {
|
||||||
|
ensure_default_encode_options(options)?;
|
||||||
|
encode_message(
|
||||||
|
value,
|
||||||
|
encoding.into(),
|
||||||
|
message_type.into(),
|
||||||
|
compression.into(),
|
||||||
|
)
|
||||||
|
.map_err(|error| PythonError::Protocol(error.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (payload, /, *, options=None))]
|
||||||
|
pub fn decode(
|
||||||
|
py: Python<'_>,
|
||||||
|
payload: &Bound<'_, PyAny>,
|
||||||
|
options: Option<&DecodeOptions>,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
let bytes = payload_to_bytes(payload)?;
|
||||||
|
let options_clone = options.cloned();
|
||||||
|
let (value, proj_opts) = py
|
||||||
|
.detach(|| decode_core_value(bytes, options_clone.as_ref()))
|
||||||
|
.map_err(to_py_err)?;
|
||||||
|
core_value_to_python_with_opts(py, value, proj_opts)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyfunction]
|
||||||
|
#[pyo3(signature = (value, /, *, options=None, encoding=Encoding::LittleEndian, message_type=MessageType::Asynchronous, compression=Compression::Uncompressed))]
|
||||||
|
pub fn encode(
|
||||||
|
py: Python<'_>,
|
||||||
|
value: &Bound<'_, PyAny>,
|
||||||
|
options: Option<&EncodeOptions>,
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
) -> PyResult<Py<PyBytes>> {
|
||||||
|
let value = python_to_core_value(value)?;
|
||||||
|
let options_clone = options.cloned();
|
||||||
|
let payload = py
|
||||||
|
.detach(|| {
|
||||||
|
encode_core_value_bytes(
|
||||||
|
&value,
|
||||||
|
options_clone.as_ref(),
|
||||||
|
encoding,
|
||||||
|
message_type,
|
||||||
|
compression,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.map_err(to_py_err)?;
|
||||||
|
Ok(PyBytes::new(py, &payload).unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
module.add_function(wrap_pyfunction!(decode, module)?)?;
|
||||||
|
module.add_function(wrap_pyfunction!(encode, module)?)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
1325
crates/qroissant-python/src/types.rs
Normal file
1325
crates/qroissant-python/src/types.rs
Normal file
File diff suppressed because it is too large
Load diff
925
crates/qroissant-python/src/values.rs
Normal file
925
crates/qroissant-python/src/values.rs
Normal file
|
|
@ -0,0 +1,925 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use pyo3::exceptions::PyIndexError;
|
||||||
|
use pyo3::exceptions::PyKeyError;
|
||||||
|
use pyo3::exceptions::PyNotImplementedError;
|
||||||
|
use pyo3::exceptions::PyValueError;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyAny;
|
||||||
|
use pyo3::types::PyBytes;
|
||||||
|
use pyo3::types::PyCapsule;
|
||||||
|
use pyo3::types::PyDict;
|
||||||
|
use pyo3::types::PyIterator;
|
||||||
|
use pyo3::types::PyList;
|
||||||
|
use pyo3::types::PyTuple;
|
||||||
|
use pyo3_arrow::ffi::ArrayIterator;
|
||||||
|
use pyo3_arrow::ffi::to_array_pycapsules;
|
||||||
|
use pyo3_arrow::ffi::to_stream_pycapsule;
|
||||||
|
use qroissant_arrow::IngestionError;
|
||||||
|
use qroissant_arrow::ProjectionOptions;
|
||||||
|
use qroissant_arrow::ingest_array;
|
||||||
|
use qroissant_arrow::ingest_record_batch;
|
||||||
|
use qroissant_arrow::ingest_record_batch_reader;
|
||||||
|
use qroissant_arrow::project;
|
||||||
|
use qroissant_arrow::project_table;
|
||||||
|
use qroissant_core::Atom as CoreAtom;
|
||||||
|
use qroissant_core::Dictionary as CoreDictionary;
|
||||||
|
use qroissant_core::List as CoreList;
|
||||||
|
use qroissant_core::Table as CoreTable;
|
||||||
|
use qroissant_core::Value as CoreValue;
|
||||||
|
use qroissant_core::Vector as CoreVector;
|
||||||
|
use qroissant_core::VectorData;
|
||||||
|
|
||||||
|
use crate::errors::to_py_err;
|
||||||
|
use crate::types::Attribute;
|
||||||
|
use crate::types::Compression;
|
||||||
|
use crate::types::Encoding;
|
||||||
|
use crate::types::MessageType;
|
||||||
|
use crate::types::Primitive;
|
||||||
|
use crate::types::Shape;
|
||||||
|
use crate::types::Type;
|
||||||
|
|
||||||
|
#[pyclass(subclass, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Value {
|
||||||
|
inner: CoreValue,
|
||||||
|
projection_opts: Arc<ProjectionOptions>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Value {
|
||||||
|
pub fn new(inner: CoreValue) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
projection_opts: Arc::new(ProjectionOptions::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_with_opts(inner: CoreValue, opts: Arc<ProjectionOptions>) -> Self {
|
||||||
|
Self {
|
||||||
|
inner,
|
||||||
|
projection_opts: opts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inner(&self) -> &CoreValue {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_inner(self) -> CoreValue {
|
||||||
|
self.inner
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn projection_opts(&self) -> &Arc<ProjectionOptions> {
|
||||||
|
&self.projection_opts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Value {
|
||||||
|
#[getter]
|
||||||
|
fn qtype(&self) -> Type {
|
||||||
|
Type::from(self.inner.qtype())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn primitive(&self) -> Option<Primitive> {
|
||||||
|
self.inner.qtype().primitive.map(Primitive::from)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn shape(&self) -> Shape {
|
||||||
|
Shape::from(self.inner.qtype().shape)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn attribute(&self) -> Option<Attribute> {
|
||||||
|
self.inner.qtype().attribute.map(Attribute::from)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (*, options=None, encoding=Encoding::LittleEndian, message_type=MessageType::Asynchronous, compression=Compression::Uncompressed))]
|
||||||
|
fn serialize(
|
||||||
|
&self,
|
||||||
|
options: Option<&crate::types::EncodeOptions>,
|
||||||
|
encoding: Encoding,
|
||||||
|
message_type: MessageType,
|
||||||
|
compression: Compression,
|
||||||
|
) -> PyResult<Py<PyBytes>> {
|
||||||
|
let inner = self.inner.clone();
|
||||||
|
let options_clone = options.cloned();
|
||||||
|
Python::attach(|py| {
|
||||||
|
let payload = py
|
||||||
|
.detach(|| {
|
||||||
|
crate::serde::encode_core_value_bytes(
|
||||||
|
&inner,
|
||||||
|
options_clone.as_ref(),
|
||||||
|
encoding,
|
||||||
|
message_type,
|
||||||
|
compression,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.map_err(to_py_err)?;
|
||||||
|
Ok(PyBytes::new(py, &payload).unbind())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(extends = Value, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Atom;
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Atom {
|
||||||
|
#[new]
|
||||||
|
fn new(qtype: PyRef<'_, Type>, value: &Bound<'_, PyAny>) -> PyResult<(Self, Value)> {
|
||||||
|
let core = atom_from_python(&qtype, value)?;
|
||||||
|
Ok((Self, Value::new(CoreValue::Atom(core))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_py(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Atom(atom) => atom_to_python(py, atom),
|
||||||
|
_ => unreachable!("Atom instances always hold q atoms"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn value(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
Self::as_py(slf, py)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_null(slf: PyRef<'_, Self>) -> bool {
|
||||||
|
use qroissant_kernels::nulls::*;
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Atom(atom) => match atom {
|
||||||
|
CoreAtom::Boolean(_)
|
||||||
|
| CoreAtom::Guid(_)
|
||||||
|
| CoreAtom::Byte(_)
|
||||||
|
| CoreAtom::Char(_)
|
||||||
|
| CoreAtom::Symbol(_) => false,
|
||||||
|
CoreAtom::Short(v) => *v == Q_NULL_SHORT,
|
||||||
|
CoreAtom::Int(v) => *v == Q_NULL_INT,
|
||||||
|
CoreAtom::Long(v) => *v == Q_NULL_LONG,
|
||||||
|
CoreAtom::Real(v) => v.is_nan(),
|
||||||
|
CoreAtom::Float(v) => v.is_nan(),
|
||||||
|
CoreAtom::Timestamp(v) => *v == Q_NULL_TIMESTAMP,
|
||||||
|
CoreAtom::Month(v) => *v == Q_NULL_MONTH,
|
||||||
|
CoreAtom::Date(v) => *v == Q_NULL_DATE,
|
||||||
|
CoreAtom::Datetime(v) => v.is_nan(),
|
||||||
|
CoreAtom::Timespan(v) => *v == Q_NULL_TIMESPAN,
|
||||||
|
CoreAtom::Minute(v) => *v == Q_NULL_MINUTE,
|
||||||
|
CoreAtom::Second(v) => *v == Q_NULL_SECOND,
|
||||||
|
CoreAtom::Time(v) => *v == Q_NULL_TIME,
|
||||||
|
},
|
||||||
|
_ => unreachable!("Atom instances always hold q atoms"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_infinite(slf: PyRef<'_, Self>) -> bool {
|
||||||
|
use qroissant_kernels::nulls::*;
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Atom(atom) => match atom {
|
||||||
|
CoreAtom::Boolean(_)
|
||||||
|
| CoreAtom::Guid(_)
|
||||||
|
| CoreAtom::Byte(_)
|
||||||
|
| CoreAtom::Char(_)
|
||||||
|
| CoreAtom::Symbol(_) => false,
|
||||||
|
CoreAtom::Short(v) => *v == Q_INF_SHORT || *v == Q_NINF_SHORT,
|
||||||
|
CoreAtom::Int(v) => *v == Q_INF_INT || *v == Q_NINF_INT,
|
||||||
|
CoreAtom::Long(v) => *v == Q_INF_LONG || *v == Q_NINF_LONG,
|
||||||
|
CoreAtom::Real(v) => v.is_infinite(),
|
||||||
|
CoreAtom::Float(v) => v.is_infinite(),
|
||||||
|
CoreAtom::Timestamp(v) => *v == Q_INF_TIMESTAMP || *v == Q_NINF_TIMESTAMP,
|
||||||
|
CoreAtom::Month(v) => *v == Q_INF_MONTH || *v == Q_NINF_MONTH,
|
||||||
|
CoreAtom::Date(v) => *v == Q_INF_DATE || *v == Q_NINF_DATE,
|
||||||
|
CoreAtom::Datetime(v) => v.is_infinite(),
|
||||||
|
CoreAtom::Timespan(v) => *v == Q_INF_TIMESPAN || *v == Q_NINF_TIMESPAN,
|
||||||
|
CoreAtom::Minute(v) => *v == Q_INF_MINUTE || *v == Q_NINF_MINUTE,
|
||||||
|
CoreAtom::Second(v) => *v == Q_INF_SECOND || *v == Q_NINF_SECOND,
|
||||||
|
CoreAtom::Time(v) => *v == Q_INF_TIME || *v == Q_NINF_TIME,
|
||||||
|
},
|
||||||
|
_ => unreachable!("Atom instances always hold q atoms"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (requested_schema=None))]
|
||||||
|
fn __arrow_c_array__(
|
||||||
|
slf: PyRef<'_, Self>,
|
||||||
|
py: Python<'_>,
|
||||||
|
requested_schema: Option<Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Py<PyTuple>> {
|
||||||
|
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||||
|
.map(|s| s.downcast_into::<PyCapsule>())
|
||||||
|
.transpose()?;
|
||||||
|
let opts = slf.as_super().projection_opts().clone();
|
||||||
|
let export = project(slf.as_super().inner(), &opts)
|
||||||
|
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||||
|
let capsules =
|
||||||
|
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||||
|
Ok(capsules.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Atom(atom) => crate::repr::format_atom(atom),
|
||||||
|
_ => unreachable!("Atom instances always hold q atoms"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
Self::__repr__(slf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(extends = Value, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Vector;
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Vector {
|
||||||
|
#[new]
|
||||||
|
fn new(qtype: PyRef<'_, Type>, values: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||||
|
let core = if let Some(values) = values {
|
||||||
|
vector_from_python(&qtype, values)?
|
||||||
|
} else {
|
||||||
|
let empty = PyList::empty(qtype.py());
|
||||||
|
vector_from_python(&qtype, empty.as_any())?
|
||||||
|
};
|
||||||
|
Ok((Self, Value::new(CoreValue::Vector(core))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Vector(vector) => vector.len(),
|
||||||
|
_ => unreachable!("Vector instances always hold q vectors"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __iter__(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
let list = Self::to_list(slf, py)?;
|
||||||
|
let iter = PyIterator::from_object(list.bind(py).as_any())?;
|
||||||
|
Ok(iter.into_any().unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __getitem__(slf: PyRef<'_, Self>, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {
|
||||||
|
let vector = match slf.as_super().inner() {
|
||||||
|
CoreValue::Vector(vector) => vector,
|
||||||
|
_ => unreachable!("Vector instances always hold q vectors"),
|
||||||
|
};
|
||||||
|
let index = normalize_index(index, vector.len())?;
|
||||||
|
vector_item_to_python(py, vector, index)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_list(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyList>> {
|
||||||
|
let vector = match slf.as_super().inner() {
|
||||||
|
CoreValue::Vector(vector) => vector,
|
||||||
|
_ => unreachable!("Vector instances always hold q vectors"),
|
||||||
|
};
|
||||||
|
vector_to_pylist(py, vector)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (requested_schema=None))]
|
||||||
|
fn __arrow_c_array__(
|
||||||
|
slf: PyRef<'_, Self>,
|
||||||
|
py: Python<'_>,
|
||||||
|
requested_schema: Option<Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Py<PyTuple>> {
|
||||||
|
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||||
|
.map(|s| s.downcast_into::<PyCapsule>())
|
||||||
|
.transpose()?;
|
||||||
|
let opts = slf.as_super().projection_opts().clone();
|
||||||
|
let export = project(slf.as_super().inner(), &opts)
|
||||||
|
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||||
|
let capsules =
|
||||||
|
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||||
|
Ok(capsules.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Vector(vector) => crate::repr::format_vector(vector),
|
||||||
|
_ => unreachable!("Vector instances always hold q vectors"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
Self::__repr__(slf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(extends = Value, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct List;
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl List {
|
||||||
|
#[new]
|
||||||
|
fn new(qtype: PyRef<'_, Type>, values: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||||
|
let core = if let Some(values) = values {
|
||||||
|
list_from_python(&qtype, values)?
|
||||||
|
} else {
|
||||||
|
let empty = PyList::empty(qtype.py());
|
||||||
|
list_from_python(&qtype, empty.as_any())?
|
||||||
|
};
|
||||||
|
Ok((Self, Value::new(CoreValue::List(core))))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::List(list) => list.len(),
|
||||||
|
_ => unreachable!("List instances always hold q lists"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __iter__(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
let list = Self::to_list(slf, py)?;
|
||||||
|
let iter = PyIterator::from_object(list.bind(py).as_any())?;
|
||||||
|
Ok(iter.into_any().unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __getitem__(slf: PyRef<'_, Self>, py: Python<'_>, index: isize) -> PyResult<Py<PyAny>> {
|
||||||
|
let list = match slf.as_super().inner() {
|
||||||
|
CoreValue::List(list) => list,
|
||||||
|
_ => unreachable!("List instances always hold q lists"),
|
||||||
|
};
|
||||||
|
let index = normalize_index(index, list.len())?;
|
||||||
|
core_value_to_python(py, list.values()[index].clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_list(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyList>> {
|
||||||
|
let list = match slf.as_super().inner() {
|
||||||
|
CoreValue::List(list) => list,
|
||||||
|
_ => unreachable!("List instances always hold q lists"),
|
||||||
|
};
|
||||||
|
let mut values = Vec::with_capacity(list.len());
|
||||||
|
for value in list.values() {
|
||||||
|
values.push(core_value_to_python(py, value.clone())?);
|
||||||
|
}
|
||||||
|
Ok(PyList::new(py, values)?.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (requested_schema=None))]
|
||||||
|
fn __arrow_c_array__(
|
||||||
|
slf: PyRef<'_, Self>,
|
||||||
|
py: Python<'_>,
|
||||||
|
requested_schema: Option<Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Py<PyTuple>> {
|
||||||
|
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||||
|
.map(|s| s.downcast_into::<PyCapsule>())
|
||||||
|
.transpose()?;
|
||||||
|
let opts = slf.as_super().projection_opts().clone();
|
||||||
|
let export = project(slf.as_super().inner(), &opts)
|
||||||
|
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||||
|
let capsules =
|
||||||
|
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||||
|
Ok(capsules.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::List(list) => crate::repr::format_list(list),
|
||||||
|
_ => unreachable!("List instances always hold q lists"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
Self::__repr__(slf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(extends = Value, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Dictionary;
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Dictionary {
|
||||||
|
#[new]
|
||||||
|
fn new(
|
||||||
|
qtype: PyRef<'_, Type>,
|
||||||
|
keys: &Bound<'_, PyAny>,
|
||||||
|
values: &Bound<'_, PyAny>,
|
||||||
|
) -> PyResult<(Self, Value)> {
|
||||||
|
let core = dictionary_from_python(&qtype, keys, values)?;
|
||||||
|
Ok((Self, Value::new(CoreValue::Dictionary(core))))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn keys(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Dictionary(dictionary) => {
|
||||||
|
core_value_to_python(py, dictionary.keys().clone())
|
||||||
|
}
|
||||||
|
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn values(slf: PyRef<'_, Self>, py: Python<'_>) -> PyResult<Py<PyAny>> {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Dictionary(dictionary) => {
|
||||||
|
core_value_to_python(py, dictionary.values().clone())
|
||||||
|
}
|
||||||
|
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __len__(slf: PyRef<'_, Self>) -> usize {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Dictionary(dictionary) => dictionary.len(),
|
||||||
|
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (requested_schema=None))]
|
||||||
|
fn __arrow_c_array__(
|
||||||
|
slf: PyRef<'_, Self>,
|
||||||
|
py: Python<'_>,
|
||||||
|
requested_schema: Option<Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Py<PyTuple>> {
|
||||||
|
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||||
|
.map(|s| s.downcast_into::<PyCapsule>())
|
||||||
|
.transpose()?;
|
||||||
|
let opts = slf.as_super().projection_opts().clone();
|
||||||
|
let export = project(slf.as_super().inner(), &opts)
|
||||||
|
.map_err(|e| PyNotImplementedError::new_err(e.to_string()))?;
|
||||||
|
let capsules =
|
||||||
|
to_array_pycapsules(py, export.field, export.array.as_ref(), schema_capsule)?;
|
||||||
|
Ok(capsules.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Dictionary(dict) => crate::repr::format_dictionary(dict),
|
||||||
|
_ => unreachable!("Dictionary instances always hold q dictionaries"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
Self::__repr__(slf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass(extends = Value, module = "qroissant")]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Table;
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Table {
|
||||||
|
#[new]
|
||||||
|
fn new(qtype: PyRef<'_, Type>, columns: Option<&Bound<'_, PyAny>>) -> PyResult<(Self, Value)> {
|
||||||
|
let core = if let Some(columns) = columns {
|
||||||
|
table_from_python(&qtype, columns)?
|
||||||
|
} else {
|
||||||
|
let empty = PyDict::new(qtype.py());
|
||||||
|
table_from_python(&qtype, empty.as_any())?
|
||||||
|
};
|
||||||
|
Ok((Self, Value::new(CoreValue::Table(core))))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn columns(slf: PyRef<'_, Self>) -> PyResult<Vec<String>> {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Table(table) => table
|
||||||
|
.column_names()
|
||||||
|
.iter()
|
||||||
|
.map(|name| {
|
||||||
|
String::from_utf8(name.to_vec()).map_err(|_| {
|
||||||
|
PyValueError::new_err("q table column names must be valid UTF-8 for now")
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn num_rows(slf: PyRef<'_, Self>) -> usize {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Table(table) => table.len(),
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[getter]
|
||||||
|
fn num_columns(slf: PyRef<'_, Self>) -> usize {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Table(table) => table.num_columns(),
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn column(slf: PyRef<'_, Self>, py: Python<'_>, name: &str) -> PyResult<Py<PyAny>> {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Table(table) => {
|
||||||
|
let needle = name.as_bytes();
|
||||||
|
for (idx, candidate) in table.column_names().iter().enumerate() {
|
||||||
|
if candidate.as_ref() == needle {
|
||||||
|
return core_value_to_python(py, table.columns()[idx].clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(PyKeyError::new_err(name.to_string()))
|
||||||
|
}
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyo3(signature = (requested_schema=None))]
|
||||||
|
fn __arrow_c_stream__(
|
||||||
|
slf: PyRef<'_, Self>,
|
||||||
|
py: Python<'_>,
|
||||||
|
requested_schema: Option<Bound<'_, PyAny>>,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
let schema_capsule: Option<Bound<'_, PyCapsule>> = requested_schema
|
||||||
|
.map(|s| s.downcast_into::<PyCapsule>())
|
||||||
|
.transpose()?;
|
||||||
|
let table = match slf.as_super().inner() {
|
||||||
|
qroissant_core::Value::Table(t) => t.clone(),
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
};
|
||||||
|
let opts = slf.as_super().projection_opts().clone();
|
||||||
|
let export = py
|
||||||
|
.detach(|| project_table(&table, &opts).map_err(|e| e.to_string()))
|
||||||
|
.map_err(|e| PyNotImplementedError::new_err(e))?;
|
||||||
|
let reader = ArrayIterator::new(vec![Ok(export.struct_array)], export.struct_field);
|
||||||
|
let capsule = to_stream_pycapsule(py, Box::new(reader), schema_capsule)?;
|
||||||
|
Ok(capsule.into_any().unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
match slf.as_super().inner() {
|
||||||
|
CoreValue::Table(table) => crate::repr::format_table(table),
|
||||||
|
_ => unreachable!("Table instances always hold q tables"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __str__(slf: PyRef<'_, Self>) -> String {
|
||||||
|
Self::__repr__(slf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn normalize_index(index: isize, len: usize) -> PyResult<usize> {
|
||||||
|
let len = len as isize;
|
||||||
|
let index = if index < 0 { len + index } else { index };
|
||||||
|
if !(0..len).contains(&index) {
|
||||||
|
return Err(PyIndexError::new_err("index out of range"));
|
||||||
|
}
|
||||||
|
Ok(index as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bytes_or_utf8(value: &Bound<'_, PyAny>) -> PyResult<Vec<u8>> {
|
||||||
|
if let Ok(bytes) = value.extract::<Vec<u8>>() {
|
||||||
|
return Ok(bytes);
|
||||||
|
}
|
||||||
|
Ok(value.extract::<String>()?.into_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn atom_to_python(py: Python<'_>, atom: &CoreAtom) -> PyResult<Py<PyAny>> {
|
||||||
|
match atom {
|
||||||
|
CoreAtom::Boolean(value) => Ok(value.into_pyobject(py)?.to_owned().unbind().into_any()),
|
||||||
|
CoreAtom::Guid(value) => Ok(PyBytes::new(py, value).unbind().into_any()),
|
||||||
|
CoreAtom::Byte(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Short(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Int(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Long(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Real(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Float(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Char(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Symbol(value) => Ok(PyBytes::new(py, value).unbind().into_any()),
|
||||||
|
CoreAtom::Timestamp(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Month(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Date(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Datetime(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Timespan(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Minute(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Second(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
CoreAtom::Time(value) => Ok(value.into_pyobject(py)?.unbind().into_any()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn atom_from_python(qtype: &Type, value: &Bound<'_, PyAny>) -> PyResult<CoreAtom> {
|
||||||
|
ensure_shape(qtype, Shape::Atom)?;
|
||||||
|
let primitive = qtype
|
||||||
|
.primitive_value()
|
||||||
|
.ok_or_else(|| PyValueError::new_err("atom qtype requires a primitive"))?;
|
||||||
|
match primitive {
|
||||||
|
Primitive::Boolean => Ok(CoreAtom::Boolean(value.extract()?)),
|
||||||
|
Primitive::Guid => {
|
||||||
|
let bytes = value.extract::<Vec<u8>>()?;
|
||||||
|
let guid: [u8; 16] = bytes.try_into().map_err(|_| {
|
||||||
|
PyValueError::new_err("guid atoms must be backed by exactly 16 bytes")
|
||||||
|
})?;
|
||||||
|
Ok(CoreAtom::Guid(guid))
|
||||||
|
}
|
||||||
|
Primitive::Byte => Ok(CoreAtom::Byte(value.extract()?)),
|
||||||
|
Primitive::Short => Ok(CoreAtom::Short(value.extract()?)),
|
||||||
|
Primitive::Int => Ok(CoreAtom::Int(value.extract()?)),
|
||||||
|
Primitive::Long => Ok(CoreAtom::Long(value.extract()?)),
|
||||||
|
Primitive::Real => Ok(CoreAtom::Real(value.extract()?)),
|
||||||
|
Primitive::Float => Ok(CoreAtom::Float(value.extract()?)),
|
||||||
|
Primitive::Char => Ok(CoreAtom::Char(extract_char_like(value)?)),
|
||||||
|
Primitive::Symbol => Ok(CoreAtom::Symbol(bytes::Bytes::from(bytes_or_utf8(value)?))),
|
||||||
|
Primitive::Timestamp => Ok(CoreAtom::Timestamp(value.extract()?)),
|
||||||
|
Primitive::Month => Ok(CoreAtom::Month(value.extract()?)),
|
||||||
|
Primitive::Date => Ok(CoreAtom::Date(value.extract()?)),
|
||||||
|
Primitive::Datetime => Ok(CoreAtom::Datetime(value.extract()?)),
|
||||||
|
Primitive::Timespan => Ok(CoreAtom::Timespan(value.extract()?)),
|
||||||
|
Primitive::Minute => Ok(CoreAtom::Minute(value.extract()?)),
|
||||||
|
Primitive::Second => Ok(CoreAtom::Second(value.extract()?)),
|
||||||
|
Primitive::Time => Ok(CoreAtom::Time(value.extract()?)),
|
||||||
|
Primitive::Mixed => Err(PyValueError::new_err("mixed atoms are not valid q values")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_char_like(value: &Bound<'_, PyAny>) -> PyResult<u8> {
|
||||||
|
if let Ok(byte) = value.extract::<u8>() {
|
||||||
|
return Ok(byte);
|
||||||
|
}
|
||||||
|
let bytes = value.extract::<Vec<u8>>()?;
|
||||||
|
let [byte] = <[u8; 1]>::try_from(bytes.as_slice())
|
||||||
|
.map_err(|_| PyValueError::new_err("char values must be a single byte or integer"))?;
|
||||||
|
Ok(byte)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vector_from_python(qtype: &Type, values: &Bound<'_, PyAny>) -> PyResult<CoreVector> {
|
||||||
|
ensure_shape(qtype, Shape::Vector)?;
|
||||||
|
let primitive = qtype
|
||||||
|
.primitive_value()
|
||||||
|
.ok_or_else(|| PyValueError::new_err("vector qtype requires a primitive"))?;
|
||||||
|
let list = values
|
||||||
|
.cast::<PyList>()
|
||||||
|
.map_err(|_| PyValueError::new_err("vector payloads must be Python lists"))?;
|
||||||
|
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||||
|
let data = match primitive {
|
||||||
|
Primitive::Boolean => {
|
||||||
|
let bools: Vec<bool> = extract_list(list, |item| item.extract())?;
|
||||||
|
let bytes: Vec<u8> = bools.into_iter().map(|b| if b { 1 } else { 0 }).collect();
|
||||||
|
VectorData::Boolean(bytes::Bytes::from(bytes))
|
||||||
|
}
|
||||||
|
Primitive::Guid => VectorData::from_guids(&extract_list(list, |item| {
|
||||||
|
let bytes = item.extract::<Vec<u8>>()?;
|
||||||
|
bytes
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| PyValueError::new_err("guid vector elements must be exactly 16 bytes"))
|
||||||
|
})?),
|
||||||
|
Primitive::Byte => VectorData::Byte(bytes::Bytes::from(extract_list(list, |item| {
|
||||||
|
item.extract::<u8>()
|
||||||
|
})?)),
|
||||||
|
Primitive::Short => VectorData::from_i16s(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Int => VectorData::from_i32s(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Long => VectorData::from_i64s(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Real => VectorData::from_f32s(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Float => VectorData::from_f64s(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Char => {
|
||||||
|
VectorData::Char(bytes::Bytes::from(extract_list(list, extract_char_like)?))
|
||||||
|
}
|
||||||
|
Primitive::Symbol => VectorData::Symbol(
|
||||||
|
extract_list(list, bytes_or_utf8)?
|
||||||
|
.into_iter()
|
||||||
|
.map(bytes::Bytes::from)
|
||||||
|
.collect(),
|
||||||
|
),
|
||||||
|
Primitive::Timestamp => {
|
||||||
|
VectorData::from_timestamps(&extract_list(list, |item| item.extract())?)
|
||||||
|
}
|
||||||
|
Primitive::Month => VectorData::from_months(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Date => VectorData::from_dates(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Datetime => {
|
||||||
|
VectorData::from_datetimes(&extract_list(list, |item| item.extract())?)
|
||||||
|
}
|
||||||
|
Primitive::Timespan => {
|
||||||
|
VectorData::from_timespans(&extract_list(list, |item| item.extract())?)
|
||||||
|
}
|
||||||
|
Primitive::Minute => VectorData::from_minutes(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Second => VectorData::from_seconds(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Time => VectorData::from_times(&extract_list(list, |item| item.extract())?),
|
||||||
|
Primitive::Mixed => {
|
||||||
|
return Err(PyValueError::new_err(
|
||||||
|
"mixed vectors must use List rather than Vector",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(CoreVector::new(attribute, data))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list_from_python(qtype: &Type, values: &Bound<'_, PyAny>) -> PyResult<CoreList> {
|
||||||
|
ensure_shape(qtype, Shape::List)?;
|
||||||
|
let list = values
|
||||||
|
.cast::<PyList>()
|
||||||
|
.map_err(|_| PyValueError::new_err("list payloads must be Python lists"))?;
|
||||||
|
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||||
|
let mut inner = Vec::with_capacity(list.len());
|
||||||
|
for item in list.iter() {
|
||||||
|
inner.push(python_to_core_value(&item)?);
|
||||||
|
}
|
||||||
|
Ok(CoreList::new(attribute, inner))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dictionary_from_python(
|
||||||
|
qtype: &Type,
|
||||||
|
keys: &Bound<'_, PyAny>,
|
||||||
|
values: &Bound<'_, PyAny>,
|
||||||
|
) -> PyResult<CoreDictionary> {
|
||||||
|
ensure_shape(qtype, Shape::Dictionary)?;
|
||||||
|
let sorted = qtype.sorted_value().unwrap_or(false);
|
||||||
|
let dictionary = CoreDictionary::new(
|
||||||
|
sorted,
|
||||||
|
python_to_core_value(keys)?,
|
||||||
|
python_to_core_value(values)?,
|
||||||
|
);
|
||||||
|
dictionary
|
||||||
|
.validate()
|
||||||
|
.map_err(|error| PyValueError::new_err(error.to_string()))?;
|
||||||
|
Ok(dictionary)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn table_from_python(qtype: &Type, columns: &Bound<'_, PyAny>) -> PyResult<CoreTable> {
|
||||||
|
ensure_shape(qtype, Shape::Table)?;
|
||||||
|
let columns = columns
|
||||||
|
.cast::<PyDict>()
|
||||||
|
.map_err(|_| PyValueError::new_err("table payloads must be Python dicts"))?;
|
||||||
|
let attribute = qtype.attribute_value().unwrap_or(Attribute::None).into();
|
||||||
|
let mut names = Vec::with_capacity(columns.len());
|
||||||
|
let mut values = Vec::with_capacity(columns.len());
|
||||||
|
for (name, column) in columns.iter() {
|
||||||
|
names.push(bytes::Bytes::from(name.extract::<String>()?.into_bytes()));
|
||||||
|
values.push(python_to_core_value(&column)?);
|
||||||
|
}
|
||||||
|
let table = CoreTable::new(attribute, names, values);
|
||||||
|
table
|
||||||
|
.validate()
|
||||||
|
.map_err(|error| PyValueError::new_err(error.to_string()))?;
|
||||||
|
Ok(table)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_shape(qtype: &Type, expected: Shape) -> PyResult<()> {
|
||||||
|
if qtype.shape_value() != expected {
|
||||||
|
return Err(PyValueError::new_err(format!(
|
||||||
|
"qtype shape {:?} does not match {:?}",
|
||||||
|
qtype.shape_value(),
|
||||||
|
expected
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_list<T, F>(items: &Bound<'_, PyList>, convert: F) -> PyResult<Vec<T>>
|
||||||
|
where
|
||||||
|
F: Fn(&Bound<'_, PyAny>) -> PyResult<T>,
|
||||||
|
{
|
||||||
|
let mut values = Vec::with_capacity(items.len());
|
||||||
|
for item in items.iter() {
|
||||||
|
values.push(convert(&item)?);
|
||||||
|
}
|
||||||
|
Ok(values)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vector_to_pylist(py: Python<'_>, vector: &CoreVector) -> PyResult<Py<PyList>> {
|
||||||
|
let len = vector.len();
|
||||||
|
let mut values = Vec::with_capacity(len);
|
||||||
|
for index in 0..len {
|
||||||
|
values.push(vector_item_to_python(py, vector, index)?);
|
||||||
|
}
|
||||||
|
Ok(PyList::new(py, values)?.unbind())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vector_item_to_python(py: Python<'_>, vector: &CoreVector, index: usize) -> PyResult<Py<PyAny>> {
|
||||||
|
let data = vector.data();
|
||||||
|
match data {
|
||||||
|
VectorData::Boolean(values) => Ok((values[index] != 0)
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.to_owned()
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Guid(values) => {
|
||||||
|
let chunk = &values[index * 16..(index + 1) * 16];
|
||||||
|
Ok(PyBytes::new(py, chunk).unbind().into_any())
|
||||||
|
}
|
||||||
|
VectorData::Byte(values) => Ok(values[index].into_pyobject(py)?.unbind().into_any()),
|
||||||
|
VectorData::Short(_) => Ok(data.as_i16_slice()[index]
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Int(_)
|
||||||
|
| VectorData::Month(_)
|
||||||
|
| VectorData::Date(_)
|
||||||
|
| VectorData::Minute(_)
|
||||||
|
| VectorData::Second(_)
|
||||||
|
| VectorData::Time(_) => Ok(data.as_i32_slice()[index]
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Long(_) | VectorData::Timestamp(_) | VectorData::Timespan(_) => Ok(data
|
||||||
|
.as_i64_slice()[index]
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Real(_) => Ok(data.as_f32_slice()[index]
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Float(_) | VectorData::Datetime(_) => Ok(data.as_f64_slice()[index]
|
||||||
|
.into_pyobject(py)?
|
||||||
|
.unbind()
|
||||||
|
.into_any()),
|
||||||
|
VectorData::Char(values) => Ok(values[index].into_pyobject(py)?.unbind().into_any()),
|
||||||
|
VectorData::Symbol(values) => Ok(PyBytes::new(py, &values[index]).unbind().into_any()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_ingestion_error(e: IngestionError) -> PyErr {
|
||||||
|
PyValueError::new_err(e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn python_to_core_value(value: &Bound<'_, PyAny>) -> PyResult<CoreValue> {
|
||||||
|
// Try qroissant Value first (it also implements Arrow protocols, so must come first).
|
||||||
|
if let Ok(q_value) = value.extract::<PyRef<'_, Value>>() {
|
||||||
|
return Ok(q_value.inner().clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Arrow stream protocol (record batches → table).
|
||||||
|
if value.hasattr("__arrow_c_stream__")? {
|
||||||
|
let capsule_obj = value.getattr("__arrow_c_stream__")?.call0()?;
|
||||||
|
let stream_capsule = capsule_obj.downcast::<PyCapsule>().map_err(PyErr::from)?;
|
||||||
|
let reader =
|
||||||
|
pyo3_arrow::PyRecordBatchReader::from_arrow_pycapsule(stream_capsule)?.into_reader()?;
|
||||||
|
let schema = reader.schema();
|
||||||
|
let value = ingest_record_batch_reader(schema, reader).map_err(map_ingestion_error)?;
|
||||||
|
return Ok(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Arrow array protocol (single array or record batch).
|
||||||
|
if value.hasattr("__arrow_c_array__")? {
|
||||||
|
// Try extracting as a record batch first.
|
||||||
|
if let Ok(record_batch) = value.extract::<pyo3_arrow::PyRecordBatch>() {
|
||||||
|
let batch = record_batch.into_inner();
|
||||||
|
let value = ingest_record_batch(batch).map_err(map_ingestion_error)?;
|
||||||
|
return Ok(value);
|
||||||
|
}
|
||||||
|
// Fall back to plain array.
|
||||||
|
let array: pyo3_arrow::PyArray = value.extract()?;
|
||||||
|
let (array, field) = array.into_inner();
|
||||||
|
let value = ingest_array(array, field.as_ref()).map_err(map_ingestion_error)?;
|
||||||
|
return Ok(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(PyNotImplementedError::new_err(
|
||||||
|
"encoding non-qroissant values is not implemented yet; \
|
||||||
|
pass a qroissant Value or an object implementing the Arrow protocol",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn core_value_to_python(py: Python<'_>, value: CoreValue) -> PyResult<Py<PyAny>> {
|
||||||
|
core_value_to_python_with_opts(py, value, Arc::new(ProjectionOptions::default()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn core_value_to_python_with_opts(
|
||||||
|
py: Python<'_>,
|
||||||
|
value: CoreValue,
|
||||||
|
opts: Arc<ProjectionOptions>,
|
||||||
|
) -> PyResult<Py<PyAny>> {
|
||||||
|
match value {
|
||||||
|
CoreValue::Atom(atom) => Ok(Py::new(
|
||||||
|
py,
|
||||||
|
(Atom, Value::new_with_opts(CoreValue::Atom(atom), opts)),
|
||||||
|
)?
|
||||||
|
.into_any()),
|
||||||
|
CoreValue::Vector(vector) => Ok(Py::new(
|
||||||
|
py,
|
||||||
|
(
|
||||||
|
Vector,
|
||||||
|
Value::new_with_opts(CoreValue::Vector(vector), opts),
|
||||||
|
),
|
||||||
|
)?
|
||||||
|
.into_any()),
|
||||||
|
CoreValue::List(list) => Ok(Py::new(
|
||||||
|
py,
|
||||||
|
(List, Value::new_with_opts(CoreValue::List(list), opts)),
|
||||||
|
)?
|
||||||
|
.into_any()),
|
||||||
|
CoreValue::Dictionary(dictionary) => Ok(Py::new(
|
||||||
|
py,
|
||||||
|
(
|
||||||
|
Dictionary,
|
||||||
|
Value::new_with_opts(CoreValue::Dictionary(dictionary), opts),
|
||||||
|
),
|
||||||
|
)?
|
||||||
|
.into_any()),
|
||||||
|
CoreValue::Table(table) => Ok(Py::new(
|
||||||
|
py,
|
||||||
|
(Table, Value::new_with_opts(CoreValue::Table(table), opts)),
|
||||||
|
)?
|
||||||
|
.into_any()),
|
||||||
|
CoreValue::UnaryPrimitive { opcode } => {
|
||||||
|
Ok(Py::new(py, Value::new(CoreValue::UnaryPrimitive { opcode }))?.into_any())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register(module: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||||
|
module.add_class::<Value>()?;
|
||||||
|
module.add_class::<Atom>()?;
|
||||||
|
module.add_class::<Vector>()?;
|
||||||
|
module.add_class::<List>()?;
|
||||||
|
module.add_class::<Dictionary>()?;
|
||||||
|
module.add_class::<Table>()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
16
crates/qroissant-transport/Cargo.toml
Normal file
16
crates/qroissant-transport/Cargo.toml
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
[package]
|
||||||
|
name = "qroissant-transport"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
license.workspace = true
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
name = "qroissant_transport"
|
||||||
|
path = "src/lib.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
bytes = "1.11.1"
|
||||||
|
qroissant-core = { path = "../qroissant-core" }
|
||||||
|
tokio = { workspace = true, features = ["io-util", "net", "time"] }
|
||||||
|
futures = { workspace = true }
|
||||||
475
crates/qroissant-transport/src/asynchronous.rs
Normal file
475
crates/qroissant-transport/src/asynchronous.rs
Normal file
|
|
@ -0,0 +1,475 @@
|
||||||
|
use std::pin::Pin;
|
||||||
|
use std::task::Context;
|
||||||
|
use std::task::Poll;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use qroissant_core::Compression;
|
||||||
|
use qroissant_core::HEADER_LEN;
|
||||||
|
use qroissant_core::MessageHeader;
|
||||||
|
use qroissant_core::StreamingDecompressor;
|
||||||
|
use qroissant_core::read_message_length;
|
||||||
|
use tokio::io::AsyncRead;
|
||||||
|
use tokio::io::AsyncReadExt;
|
||||||
|
use tokio::io::AsyncWrite;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
|
use tokio::io::ReadBuf;
|
||||||
|
use tokio::net::TcpStream;
|
||||||
|
#[cfg(unix)]
|
||||||
|
use tokio::net::UnixStream;
|
||||||
|
|
||||||
|
use crate::TransportError;
|
||||||
|
use crate::TransportResult;
|
||||||
|
use crate::synchronous::CLIENT_CAPABILITY;
|
||||||
|
|
||||||
|
pub enum AsyncTransport {
|
||||||
|
Tcp(TcpStream),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Unix(UnixStream),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncRead for AsyncTransport {
|
||||||
|
fn poll_read(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &mut ReadBuf<'_>,
|
||||||
|
) -> Poll<std::io::Result<()>> {
|
||||||
|
match &mut *self {
|
||||||
|
Self::Tcp(stream) => Pin::new(stream).poll_read(cx, buf),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => Pin::new(stream).poll_read(cx, buf),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncWrite for AsyncTransport {
|
||||||
|
fn poll_write(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> Poll<std::io::Result<usize>> {
|
||||||
|
match &mut *self {
|
||||||
|
Self::Tcp(stream) => Pin::new(stream).poll_write(cx, buf),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => Pin::new(stream).poll_write(cx, buf),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||||
|
match &mut *self {
|
||||||
|
Self::Tcp(stream) => Pin::new(stream).poll_flush(cx),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => Pin::new(stream).poll_flush(cx),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||||
|
match &mut *self {
|
||||||
|
Self::Tcp(stream) => Pin::new(stream).poll_shutdown(cx),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => Pin::new(stream).poll_shutdown(cx),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncTransport {
|
||||||
|
pub async fn shutdown(&mut self) -> std::io::Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.shutdown().await,
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.shutdown().await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn take_error(&self) -> std::io::Result<Option<std::io::Error>> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.take_error(),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.take_error(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AsyncPooledTransport {
|
||||||
|
transport: AsyncTransport,
|
||||||
|
broken: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncPooledTransport {
|
||||||
|
pub fn new(transport: AsyncTransport) -> Self {
|
||||||
|
Self {
|
||||||
|
transport,
|
||||||
|
broken: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mark_broken(&mut self) {
|
||||||
|
self.broken = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_broken(&self) -> bool {
|
||||||
|
self.broken || self.transport.take_error().ok().flatten().is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn transport_mut(&mut self) -> &mut AsyncTransport {
|
||||||
|
&mut self.transport
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A reader that transparently decompresses q IPC payloads as they are read.
|
||||||
|
pub struct DecompressingReader<'a, R> {
|
||||||
|
reader: &'a mut R,
|
||||||
|
decompressor: Option<StreamingDecompressor>,
|
||||||
|
remaining_compressed: usize,
|
||||||
|
buffer: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, R: AsyncRead + Unpin> DecompressingReader<'a, R> {
|
||||||
|
pub fn new(
|
||||||
|
reader: &'a mut R,
|
||||||
|
decompressor: Option<StreamingDecompressor>,
|
||||||
|
remaining_compressed: usize,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
reader,
|
||||||
|
decompressor,
|
||||||
|
remaining_compressed,
|
||||||
|
buffer: vec![0_u8; 8192],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, R: AsyncRead + Unpin> AsyncRead for DecompressingReader<'a, R> {
|
||||||
|
fn poll_read(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &mut ReadBuf<'_>,
|
||||||
|
) -> Poll<std::io::Result<()>> {
|
||||||
|
let this = &mut *self;
|
||||||
|
|
||||||
|
if let Some(decompressor) = &mut this.decompressor {
|
||||||
|
// If we have decompressed data available, yield it first.
|
||||||
|
if decompressor.unread_len() > 0 {
|
||||||
|
let chunk = decompressor.next_chunk();
|
||||||
|
let to_copy = chunk.len().min(buf.remaining());
|
||||||
|
buf.put_slice(&chunk[..to_copy]);
|
||||||
|
decompressor.consume(to_copy);
|
||||||
|
return Poll::Ready(Ok(()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// If decompression is complete and no more unread bytes, EOF.
|
||||||
|
if decompressor.is_complete() {
|
||||||
|
return Poll::Ready(Ok(()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, read more compressed data from the underlying reader.
|
||||||
|
if this.remaining_compressed > 0 {
|
||||||
|
let want = this.remaining_compressed.min(this.buffer.len());
|
||||||
|
let mut read_buf = ReadBuf::new(&mut this.buffer[..want]);
|
||||||
|
match Pin::new(&mut this.reader).poll_read(cx, &mut read_buf) {
|
||||||
|
Poll::Ready(Ok(())) => {
|
||||||
|
let read = read_buf.filled().len();
|
||||||
|
if read == 0 && want > 0 {
|
||||||
|
return Poll::Ready(Err(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::UnexpectedEof,
|
||||||
|
"unexpected EOF reading compressed body",
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
this.remaining_compressed -= read;
|
||||||
|
decompressor.feed(read_buf.filled()).map_err(|e| {
|
||||||
|
std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Recursive call to yield the newly decompressed bytes.
|
||||||
|
return self.poll_read(cx, buf);
|
||||||
|
}
|
||||||
|
Poll::Ready(Err(e)) => return Poll::Ready(Err(e)),
|
||||||
|
Poll::Pending => return Poll::Pending,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Poll::Ready(Ok(()))
|
||||||
|
} else {
|
||||||
|
// Uncompressed path: direct read from underlying reader.
|
||||||
|
Pin::new(&mut this.reader).poll_read(cx, buf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncRead for AsyncPooledTransport {
|
||||||
|
fn poll_read(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &mut ReadBuf<'_>,
|
||||||
|
) -> Poll<std::io::Result<()>> {
|
||||||
|
Pin::new(&mut self.transport).poll_read(cx, buf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AsyncWrite for AsyncPooledTransport {
|
||||||
|
fn poll_write(
|
||||||
|
mut self: Pin<&mut Self>,
|
||||||
|
cx: &mut Context<'_>,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> Poll<std::io::Result<usize>> {
|
||||||
|
Pin::new(&mut self.transport).poll_write(cx, buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||||
|
Pin::new(&mut self.transport).poll_flush(cx)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
|
||||||
|
Pin::new(&mut self.transport).poll_shutdown(cx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn credentials_bytes(username: Option<&str>, password: Option<&str>) -> Vec<u8> {
|
||||||
|
let username = username.unwrap_or_default();
|
||||||
|
let password = password.unwrap_or_default();
|
||||||
|
let mut bytes = format!("{username}:{password}").into_bytes();
|
||||||
|
bytes.push(CLIENT_CAPABILITY);
|
||||||
|
bytes.push(0);
|
||||||
|
bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
fn timeout_error(context: &str, timeout_ms: u64) -> TransportError {
|
||||||
|
TransportError::Io(std::io::Error::new(
|
||||||
|
std::io::ErrorKind::TimedOut,
|
||||||
|
format!("{context} timed out after {timeout_ms}ms"),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_with_timeout<T, F>(
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
context: &str,
|
||||||
|
future: F,
|
||||||
|
) -> TransportResult<T>
|
||||||
|
where
|
||||||
|
F: std::future::Future<Output = std::io::Result<T>>,
|
||||||
|
{
|
||||||
|
match timeout_ms {
|
||||||
|
Some(timeout_ms) => tokio::time::timeout(Duration::from_millis(timeout_ms), future)
|
||||||
|
.await
|
||||||
|
.map_err(|_| timeout_error(context, timeout_ms))?
|
||||||
|
.map_err(TransportError::Io),
|
||||||
|
None => future.await.map_err(TransportError::Io),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn perform_handshake<S>(
|
||||||
|
stream: &mut S,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<u8>
|
||||||
|
where
|
||||||
|
S: AsyncRead + AsyncWrite + Unpin,
|
||||||
|
{
|
||||||
|
run_with_timeout(
|
||||||
|
timeout_ms,
|
||||||
|
"q IPC handshake write",
|
||||||
|
stream.write_all(&credentials_bytes(username, password)),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
run_with_timeout(timeout_ms, "q IPC handshake flush", stream.flush()).await?;
|
||||||
|
|
||||||
|
let mut capability = [0_u8; 1];
|
||||||
|
run_with_timeout(
|
||||||
|
timeout_ms,
|
||||||
|
"q IPC handshake read",
|
||||||
|
stream.read_exact(&mut capability),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(capability[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn connect_tcp_transport(
|
||||||
|
host: &str,
|
||||||
|
port: u16,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<AsyncTransport> {
|
||||||
|
let mut stream =
|
||||||
|
run_with_timeout(timeout_ms, "TCP connect", TcpStream::connect((host, port))).await?;
|
||||||
|
stream.set_nodelay(true)?;
|
||||||
|
perform_handshake(&mut stream, username, password, timeout_ms).await?;
|
||||||
|
Ok(AsyncTransport::Tcp(stream))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub async fn connect_unix_transport(
|
||||||
|
path: &str,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<AsyncTransport> {
|
||||||
|
let mut stream =
|
||||||
|
run_with_timeout(timeout_ms, "Unix socket connect", UnixStream::connect(path)).await?;
|
||||||
|
perform_handshake(&mut stream, username, password, timeout_ms).await?;
|
||||||
|
Ok(AsyncTransport::Unix(stream))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn read_frame<S>(stream: &mut S) -> TransportResult<Vec<u8>>
|
||||||
|
where
|
||||||
|
S: AsyncRead + Unpin,
|
||||||
|
{
|
||||||
|
let mut header = [0_u8; HEADER_LEN];
|
||||||
|
stream.read_exact(&mut header).await?;
|
||||||
|
let message_length = read_message_length(&header)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
let mut frame = vec![0_u8; message_length];
|
||||||
|
frame[..HEADER_LEN].copy_from_slice(&header);
|
||||||
|
stream.read_exact(&mut frame[HEADER_LEN..]).await?;
|
||||||
|
Ok(frame)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn request_frame_over<S>(stream: &mut S, payload: &[u8]) -> TransportResult<Vec<u8>>
|
||||||
|
where
|
||||||
|
S: AsyncRead + AsyncWrite + Unpin,
|
||||||
|
{
|
||||||
|
stream.write_all(payload).await?;
|
||||||
|
stream.flush().await?;
|
||||||
|
read_frame(stream).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn begin_streaming_frame_over<S>(
|
||||||
|
stream: &mut S,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<([u8; HEADER_LEN], usize)>
|
||||||
|
where
|
||||||
|
S: AsyncRead + AsyncWrite + Unpin,
|
||||||
|
{
|
||||||
|
stream.write_all(payload).await?;
|
||||||
|
stream.flush().await?;
|
||||||
|
|
||||||
|
let mut header = [0_u8; HEADER_LEN];
|
||||||
|
stream.read_exact(&mut header).await?;
|
||||||
|
let message_length = read_message_length(&header)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
Ok((header, message_length - HEADER_LEN))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Async variant of [`crate::synchronous::request_frame_streaming_over`].
|
||||||
|
///
|
||||||
|
/// Sends a payload and reads the response frame, using streaming decompression
|
||||||
|
/// when the response is compressed.
|
||||||
|
pub async fn request_frame_streaming_over<S>(
|
||||||
|
stream: &mut S,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<Vec<u8>>
|
||||||
|
where
|
||||||
|
S: AsyncRead + AsyncWrite + Unpin,
|
||||||
|
{
|
||||||
|
stream.write_all(payload).await?;
|
||||||
|
stream.flush().await?;
|
||||||
|
|
||||||
|
// Read the 8-byte header.
|
||||||
|
let mut header_bytes = [0_u8; HEADER_LEN];
|
||||||
|
stream.read_exact(&mut header_bytes).await?;
|
||||||
|
|
||||||
|
let header = MessageHeader::from_bytes(header_bytes)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
let body_len = header.body_len();
|
||||||
|
|
||||||
|
if header.compression() == Compression::Uncompressed {
|
||||||
|
let mut frame = vec![0_u8; header.size()];
|
||||||
|
frame[..HEADER_LEN].copy_from_slice(&header_bytes);
|
||||||
|
stream.read_exact(&mut frame[HEADER_LEN..]).await?;
|
||||||
|
return Ok(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compressed frame: read the 4-byte size prefix first.
|
||||||
|
if body_len < 4 {
|
||||||
|
return Err(TransportError::Protocol(
|
||||||
|
"compressed body must be at least 4 bytes for size prefix".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut size_prefix = [0_u8; 4];
|
||||||
|
stream.read_exact(&mut size_prefix).await?;
|
||||||
|
|
||||||
|
let mut decompressor = StreamingDecompressor::new(size_prefix, header.encoding())
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
// Read the remaining compressed body in chunks.
|
||||||
|
let remaining = body_len - 4;
|
||||||
|
let mut total_read = 0_usize;
|
||||||
|
let mut chunk = vec![0_u8; 8192];
|
||||||
|
|
||||||
|
while total_read < remaining {
|
||||||
|
let want = (remaining - total_read).min(chunk.len());
|
||||||
|
stream.read_exact(&mut chunk[..want]).await?;
|
||||||
|
decompressor
|
||||||
|
.feed(&chunk[..want])
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
total_read += want;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !decompressor.is_complete() {
|
||||||
|
return Err(TransportError::Protocol(
|
||||||
|
"streaming decompression did not complete after reading entire body".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let decompressed = decompressor
|
||||||
|
.finish()
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
// Reconstruct as an uncompressed frame.
|
||||||
|
let new_size = HEADER_LEN + decompressed.len();
|
||||||
|
let new_header = qroissant_core::MessageHeader::new(
|
||||||
|
header.encoding(),
|
||||||
|
header.message_type(),
|
||||||
|
Compression::Uncompressed,
|
||||||
|
new_size,
|
||||||
|
)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
let mut frame = Vec::with_capacity(new_size);
|
||||||
|
frame.extend_from_slice(
|
||||||
|
&new_header
|
||||||
|
.to_bytes()
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?,
|
||||||
|
);
|
||||||
|
frame.extend_from_slice(&decompressed);
|
||||||
|
Ok(frame)
|
||||||
|
}
|
||||||
|
use qroissant_core::pipelined::PipelinedReader;
|
||||||
|
use qroissant_core::pipelined::decode_value_async;
|
||||||
|
use qroissant_core::value::Value;
|
||||||
|
|
||||||
|
pub async fn request_value_pipelined_over<R: AsyncRead + AsyncWrite + Unpin + Send + 'static>(
|
||||||
|
conn: &mut R,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<Value> {
|
||||||
|
conn.write_all(payload).await.map_err(TransportError::Io)?;
|
||||||
|
conn.flush().await.map_err(TransportError::Io)?;
|
||||||
|
|
||||||
|
let mut header_bytes = [0_u8; HEADER_LEN];
|
||||||
|
conn.read_exact(&mut header_bytes)
|
||||||
|
.await
|
||||||
|
.map_err(TransportError::Io)?;
|
||||||
|
let header =
|
||||||
|
MessageHeader::parse(&header_bytes).map_err(|e| TransportError::Protocol(e.to_string()))?;
|
||||||
|
|
||||||
|
let (decompressor, remaining_compressed) = if header.compression() != Compression::Uncompressed
|
||||||
|
{
|
||||||
|
let mut size_prefix = [0_u8; 4];
|
||||||
|
conn.read_exact(&mut size_prefix)
|
||||||
|
.await
|
||||||
|
.map_err(TransportError::Io)?;
|
||||||
|
let decompressor = StreamingDecompressor::new(size_prefix, header.encoding())
|
||||||
|
.map_err(|e| TransportError::Protocol(e.to_string()))?;
|
||||||
|
(Some(decompressor), header.body_len() - 4)
|
||||||
|
} else {
|
||||||
|
(None, header.body_len())
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut decomp_reader = DecompressingReader::new(conn, decompressor, remaining_compressed);
|
||||||
|
let mut pipelined_reader = PipelinedReader::new(&mut decomp_reader, header.encoding())
|
||||||
|
.map_err(|e| TransportError::Protocol(e.to_string()))?;
|
||||||
|
|
||||||
|
decode_value_async(&mut pipelined_reader)
|
||||||
|
.await
|
||||||
|
.map_err(|e| TransportError::Protocol(e.to_string()))
|
||||||
|
}
|
||||||
42
crates/qroissant-transport/src/error.rs
Normal file
42
crates/qroissant-transport/src/error.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
pub type TransportResult<T> = Result<T, TransportError>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum TransportError {
|
||||||
|
Io(std::io::Error),
|
||||||
|
InvalidEndpoint(String),
|
||||||
|
InvalidQueryLength(usize),
|
||||||
|
Protocol(String),
|
||||||
|
Closed,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for TransportError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Io(error) => error.fmt(f),
|
||||||
|
Self::InvalidEndpoint(message) => write!(f, "{message}"),
|
||||||
|
Self::InvalidQueryLength(length) => write!(
|
||||||
|
f,
|
||||||
|
"q query string length {length} exceeds 32-bit q IPC capacity"
|
||||||
|
),
|
||||||
|
Self::Protocol(message) => write!(f, "{message}"),
|
||||||
|
Self::Closed => write!(f, "connection is closed"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for TransportError {
|
||||||
|
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
|
||||||
|
match self {
|
||||||
|
Self::Io(error) => Some(error),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for TransportError {
|
||||||
|
fn from(value: std::io::Error) -> Self {
|
||||||
|
Self::Io(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
37
crates/qroissant-transport/src/lib.rs
Normal file
37
crates/qroissant-transport/src/lib.rs
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
//! Shared q IPC transport primitives.
|
||||||
|
|
||||||
|
mod asynchronous;
|
||||||
|
mod error;
|
||||||
|
mod synchronous;
|
||||||
|
|
||||||
|
pub use asynchronous::AsyncPooledTransport;
|
||||||
|
pub use asynchronous::AsyncTransport;
|
||||||
|
pub use asynchronous::begin_streaming_frame_over as begin_streaming_frame_over_async;
|
||||||
|
pub use asynchronous::connect_tcp_transport as connect_tcp_transport_async;
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub use asynchronous::connect_unix_transport as connect_unix_transport_async;
|
||||||
|
pub use asynchronous::read_frame as read_frame_async;
|
||||||
|
pub use asynchronous::request_frame_over as request_frame_over_async;
|
||||||
|
pub use asynchronous::request_frame_streaming_over as request_frame_streaming_over_async;
|
||||||
|
pub use asynchronous::request_value_pipelined_over as request_value_pipelined_over_async;
|
||||||
|
pub use error::TransportError;
|
||||||
|
pub use error::TransportResult;
|
||||||
|
pub use qroissant_core::HEADER_LEN as QIPC_HEADER_LEN;
|
||||||
|
pub use synchronous::CLIENT_CAPABILITY;
|
||||||
|
pub use synchronous::SyncConnection;
|
||||||
|
pub use synchronous::SyncPooledTransport;
|
||||||
|
pub use synchronous::SyncTransport;
|
||||||
|
pub use synchronous::begin_streaming_frame_over;
|
||||||
|
pub use synchronous::connect_tcp_transport;
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub use synchronous::connect_unix_transport;
|
||||||
|
pub use synchronous::credentials_bytes;
|
||||||
|
pub use synchronous::encode_sync_query;
|
||||||
|
pub use synchronous::extract_q_error;
|
||||||
|
pub use synchronous::parse_message_header;
|
||||||
|
pub use synchronous::perform_handshake;
|
||||||
|
pub use synchronous::request_frame_over;
|
||||||
|
pub use synchronous::request_frame_streaming_over;
|
||||||
|
pub use synchronous::validate_response_frame;
|
||||||
|
pub use synchronous::validate_response_header;
|
||||||
|
pub use synchronous::validate_response_header_bytes;
|
||||||
420
crates/qroissant-transport/src/synchronous.rs
Normal file
420
crates/qroissant-transport/src/synchronous.rs
Normal file
|
|
@ -0,0 +1,420 @@
|
||||||
|
use std::io::Read;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::net::Shutdown;
|
||||||
|
use std::net::TcpStream;
|
||||||
|
#[cfg(unix)]
|
||||||
|
use std::os::unix::net::UnixStream;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use qroissant_core::Attribute;
|
||||||
|
use qroissant_core::Compression;
|
||||||
|
use qroissant_core::Encoding;
|
||||||
|
use qroissant_core::Frame;
|
||||||
|
use qroissant_core::HEADER_LEN;
|
||||||
|
use qroissant_core::MessageHeader;
|
||||||
|
use qroissant_core::MessageType;
|
||||||
|
use qroissant_core::StreamingDecompressor;
|
||||||
|
use qroissant_core::Value;
|
||||||
|
use qroissant_core::Vector;
|
||||||
|
use qroissant_core::VectorData;
|
||||||
|
use qroissant_core::encode_message;
|
||||||
|
use qroissant_core::read_frame;
|
||||||
|
use qroissant_core::read_message_length;
|
||||||
|
|
||||||
|
use crate::TransportError;
|
||||||
|
use crate::TransportResult;
|
||||||
|
|
||||||
|
pub const CLIENT_CAPABILITY: u8 = 3;
|
||||||
|
|
||||||
|
pub enum SyncTransport {
|
||||||
|
Tcp(TcpStream),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Unix(UnixStream),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncTransport {
|
||||||
|
pub fn shutdown(&mut self) -> std::io::Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.shutdown(Shutdown::Both),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.shutdown(Shutdown::Both),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn take_error(&self) -> std::io::Result<Option<std::io::Error>> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.take_error(),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.take_error(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_timeouts(&self, timeout_ms: Option<u64>) -> std::io::Result<()> {
|
||||||
|
let timeout = timeout_ms.map(Duration::from_millis);
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => {
|
||||||
|
stream.set_read_timeout(timeout)?;
|
||||||
|
stream.set_write_timeout(timeout)?;
|
||||||
|
stream.set_nodelay(true)
|
||||||
|
}
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => {
|
||||||
|
stream.set_read_timeout(timeout)?;
|
||||||
|
stream.set_write_timeout(timeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Read for SyncTransport {
|
||||||
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.read(buf),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.read(buf),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Write for SyncTransport {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.write(buf),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.write(buf),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&mut self) -> std::io::Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::Tcp(stream) => stream.flush(),
|
||||||
|
#[cfg(unix)]
|
||||||
|
Self::Unix(stream) => stream.flush(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SyncPooledTransport {
|
||||||
|
transport: SyncTransport,
|
||||||
|
broken: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncPooledTransport {
|
||||||
|
pub fn new(transport: SyncTransport) -> Self {
|
||||||
|
Self {
|
||||||
|
transport,
|
||||||
|
broken: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mark_broken(&mut self) {
|
||||||
|
self.broken = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_broken(&self) -> bool {
|
||||||
|
self.broken || self.transport.take_error().ok().flatten().is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn shutdown(&mut self) -> std::io::Result<()> {
|
||||||
|
self.transport.shutdown()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Read for SyncPooledTransport {
|
||||||
|
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||||
|
self.transport.read(buf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Write for SyncPooledTransport {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||||
|
self.transport.write(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&mut self) -> std::io::Result<()> {
|
||||||
|
self.transport.flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn credentials_bytes(username: Option<&str>, password: Option<&str>) -> Vec<u8> {
|
||||||
|
let username = username.unwrap_or_default();
|
||||||
|
let password = password.unwrap_or_default();
|
||||||
|
let mut bytes = format!("{username}:{password}").into_bytes();
|
||||||
|
bytes.push(CLIENT_CAPABILITY);
|
||||||
|
bytes.push(0);
|
||||||
|
bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn perform_handshake<S: Read + Write>(
|
||||||
|
stream: &mut S,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
) -> TransportResult<u8> {
|
||||||
|
stream.write_all(&credentials_bytes(username, password))?;
|
||||||
|
stream.flush()?;
|
||||||
|
|
||||||
|
let mut capability = [0_u8; 1];
|
||||||
|
stream.read_exact(&mut capability)?;
|
||||||
|
Ok(capability[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_sync_query(message: &str) -> TransportResult<Vec<u8>> {
|
||||||
|
let _ = i32::try_from(message.len())
|
||||||
|
.map_err(|_| TransportError::InvalidQueryLength(message.len()))?;
|
||||||
|
let value = Value::Vector(Vector::new(
|
||||||
|
Attribute::None,
|
||||||
|
VectorData::Char(bytes::Bytes::copy_from_slice(message.as_bytes())),
|
||||||
|
));
|
||||||
|
|
||||||
|
encode_message(
|
||||||
|
&value,
|
||||||
|
Encoding::LittleEndian,
|
||||||
|
MessageType::Synchronous,
|
||||||
|
Compression::Uncompressed,
|
||||||
|
)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extract_q_error(frame_bytes: &[u8]) -> TransportResult<Option<String>> {
|
||||||
|
let frame =
|
||||||
|
Frame::parse(frame_bytes).map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
let body = frame.body();
|
||||||
|
if body.first().copied() != Some(128) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let message = match body[1..].iter().position(|byte| *byte == 0) {
|
||||||
|
Some(end) => &body[1..1 + end],
|
||||||
|
None => &body[1..],
|
||||||
|
};
|
||||||
|
Ok(Some(String::from_utf8_lossy(message).into_owned()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_message_header(header_bytes: [u8; HEADER_LEN]) -> TransportResult<MessageHeader> {
|
||||||
|
MessageHeader::from_bytes(header_bytes)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate_response_header(header: MessageHeader) -> TransportResult<()> {
|
||||||
|
if header.message_type() != MessageType::Response {
|
||||||
|
return Err(TransportError::Protocol(format!(
|
||||||
|
"expected a q response frame, received {:?}",
|
||||||
|
header.message_type()
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate_response_header_bytes(
|
||||||
|
header_bytes: [u8; HEADER_LEN],
|
||||||
|
) -> TransportResult<MessageHeader> {
|
||||||
|
let header = parse_message_header(header_bytes)?;
|
||||||
|
validate_response_header(header)?;
|
||||||
|
Ok(header)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn validate_response_frame(frame_bytes: &[u8]) -> TransportResult<MessageHeader> {
|
||||||
|
let frame =
|
||||||
|
Frame::parse(frame_bytes).map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
let header = frame.header();
|
||||||
|
validate_response_header(header)?;
|
||||||
|
Ok(header)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn connect_tcp_transport(
|
||||||
|
host: &str,
|
||||||
|
port: u16,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<SyncTransport> {
|
||||||
|
let mut stream = SyncTransport::Tcp(TcpStream::connect((host, port))?);
|
||||||
|
stream.set_timeouts(timeout_ms)?;
|
||||||
|
perform_handshake(&mut stream, username, password)?;
|
||||||
|
Ok(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub fn connect_unix_transport(
|
||||||
|
path: &str,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<SyncTransport> {
|
||||||
|
let mut stream = SyncTransport::Unix(UnixStream::connect(path)?);
|
||||||
|
stream.set_timeouts(timeout_ms)?;
|
||||||
|
perform_handshake(&mut stream, username, password)?;
|
||||||
|
Ok(stream)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn request_frame_over<S: Read + Write>(
|
||||||
|
stream: &mut S,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<Vec<u8>> {
|
||||||
|
stream.write_all(payload)?;
|
||||||
|
stream.flush()?;
|
||||||
|
read_frame(stream).map_err(|error| TransportError::Protocol(error.to_string()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sends a payload and reads the response frame, using streaming decompression
|
||||||
|
/// when the response is compressed.
|
||||||
|
///
|
||||||
|
/// For compressed frames, the body is read in chunks and fed to a
|
||||||
|
/// [`StreamingDecompressor`] incrementally, overlapping network I/O with
|
||||||
|
/// decompression work. The returned frame is reconstructed as an
|
||||||
|
/// *uncompressed* frame so callers can decode it normally.
|
||||||
|
///
|
||||||
|
/// For uncompressed frames, this behaves identically to [`request_frame_over`].
|
||||||
|
pub fn request_frame_streaming_over<S: Read + Write>(
|
||||||
|
stream: &mut S,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<Vec<u8>> {
|
||||||
|
stream.write_all(payload)?;
|
||||||
|
stream.flush()?;
|
||||||
|
|
||||||
|
// Read the 8-byte header.
|
||||||
|
let mut header_bytes = [0_u8; HEADER_LEN];
|
||||||
|
stream.read_exact(&mut header_bytes)?;
|
||||||
|
|
||||||
|
let header = parse_message_header(header_bytes)?;
|
||||||
|
let body_len = header.body_len();
|
||||||
|
|
||||||
|
if header.compression() == Compression::Uncompressed {
|
||||||
|
// Fast path: read entire uncompressed body.
|
||||||
|
let mut frame = vec![0_u8; header.size()];
|
||||||
|
frame[..HEADER_LEN].copy_from_slice(&header_bytes);
|
||||||
|
stream.read_exact(&mut frame[HEADER_LEN..])?;
|
||||||
|
return Ok(frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compressed frame: read the 4-byte size prefix first.
|
||||||
|
if body_len < 4 {
|
||||||
|
return Err(TransportError::Protocol(
|
||||||
|
"compressed body must be at least 4 bytes for size prefix".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut size_prefix = [0_u8; 4];
|
||||||
|
stream.read_exact(&mut size_prefix)?;
|
||||||
|
|
||||||
|
let mut decompressor = StreamingDecompressor::new(size_prefix, header.encoding())
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
// Read the remaining compressed body in chunks.
|
||||||
|
let remaining = body_len - 4;
|
||||||
|
let mut total_read = 0_usize;
|
||||||
|
let mut chunk = [0_u8; 8192];
|
||||||
|
|
||||||
|
while total_read < remaining {
|
||||||
|
let want = (remaining - total_read).min(chunk.len());
|
||||||
|
stream.read_exact(&mut chunk[..want])?;
|
||||||
|
decompressor
|
||||||
|
.feed(&chunk[..want])
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
total_read += want;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !decompressor.is_complete() {
|
||||||
|
return Err(TransportError::Protocol(
|
||||||
|
"streaming decompression did not complete after reading entire body".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let decompressed = decompressor
|
||||||
|
.finish()
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
// Reconstruct as an uncompressed frame: header + decompressed body.
|
||||||
|
let new_size = HEADER_LEN + decompressed.len();
|
||||||
|
let new_header = MessageHeader::new(
|
||||||
|
header.encoding(),
|
||||||
|
header.message_type(),
|
||||||
|
Compression::Uncompressed,
|
||||||
|
new_size,
|
||||||
|
)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
|
||||||
|
let mut frame = Vec::with_capacity(new_size);
|
||||||
|
frame.extend_from_slice(
|
||||||
|
&new_header
|
||||||
|
.to_bytes()
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?,
|
||||||
|
);
|
||||||
|
frame.extend_from_slice(&decompressed);
|
||||||
|
Ok(frame)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn begin_streaming_frame_over<S: Read + Write>(
|
||||||
|
stream: &mut S,
|
||||||
|
payload: &[u8],
|
||||||
|
) -> TransportResult<([u8; HEADER_LEN], usize)> {
|
||||||
|
stream.write_all(payload)?;
|
||||||
|
stream.flush()?;
|
||||||
|
|
||||||
|
let mut header = [0_u8; HEADER_LEN];
|
||||||
|
stream.read_exact(&mut header)?;
|
||||||
|
let message_length = read_message_length(&header)
|
||||||
|
.map_err(|error| TransportError::Protocol(error.to_string()))?;
|
||||||
|
Ok((header, message_length - HEADER_LEN))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SyncConnection {
|
||||||
|
transport: Option<SyncTransport>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SyncConnection {
|
||||||
|
pub fn connect_tcp(
|
||||||
|
host: &str,
|
||||||
|
port: u16,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
transport: Some(connect_tcp_transport(
|
||||||
|
host, port, username, password, timeout_ms,
|
||||||
|
)?),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub fn connect_unix(
|
||||||
|
path: &str,
|
||||||
|
username: Option<&str>,
|
||||||
|
password: Option<&str>,
|
||||||
|
timeout_ms: Option<u64>,
|
||||||
|
) -> TransportResult<Self> {
|
||||||
|
Ok(Self {
|
||||||
|
transport: Some(connect_unix_transport(
|
||||||
|
path, username, password, timeout_ms,
|
||||||
|
)?),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn query_frame(&mut self, message: &str) -> TransportResult<Vec<u8>> {
|
||||||
|
let payload = encode_sync_query(message)?;
|
||||||
|
let transport = self.transport.as_mut().ok_or(TransportError::Closed)?;
|
||||||
|
let frame = request_frame_over(transport, &payload)?;
|
||||||
|
validate_response_frame(&frame)?;
|
||||||
|
Ok(frame)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_closed(&self) -> bool {
|
||||||
|
self.transport.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn close(&mut self) -> TransportResult<()> {
|
||||||
|
let Some(mut transport) = self.transport.take() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
transport.shutdown()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for SyncConnection {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let _ = self.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
59
pyproject.toml
Normal file
59
pyproject.toml
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
[build-system]
|
||||||
|
requires = ["maturin>=1.8,<2.0"]
|
||||||
|
build-backend = "maturin"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "qroissant"
|
||||||
|
version = "0.3.0"
|
||||||
|
description = "q/kdb+ IPC client library with Arrow-native Python interoperability"
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
license = "Apache-2.0"
|
||||||
|
license-files = []
|
||||||
|
authors = [{ name = "qroissant contributors" }]
|
||||||
|
keywords = ["kdb", "q", "ipc", "arrow", "pyo3"]
|
||||||
|
classifiers = [
|
||||||
|
"Development Status :: 3 - Alpha",
|
||||||
|
"License :: OSI Approved :: Apache Software License",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"Programming Language :: Python :: 3.11",
|
||||||
|
"Programming Language :: Python :: 3.12",
|
||||||
|
"Programming Language :: Python :: 3.13",
|
||||||
|
"Programming Language :: Rust",
|
||||||
|
]
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"maturin>=1.8,<2.0",
|
||||||
|
"polars>=1.39.3",
|
||||||
|
"pyarrow>=23.0.1",
|
||||||
|
"pytest>=8.3,<9.0",
|
||||||
|
"ruff>=0.11,<0.12",
|
||||||
|
]
|
||||||
|
docs = [
|
||||||
|
"mkdocs>=1.6,<2.0",
|
||||||
|
"mkdocs-material>=9.6,<10.0",
|
||||||
|
"mkdocs-material-extensions>=1.3,<2.0",
|
||||||
|
"mkdocstrings[python]>=0.28,<1.0",
|
||||||
|
"mkdocs-autorefs>=1.3,<2.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.maturin]
|
||||||
|
manifest-path = "crates/qroissant-python/Cargo.toml"
|
||||||
|
python-source = "python"
|
||||||
|
module-name = "qroissant._native"
|
||||||
|
features = []
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
pythonpath = ["python"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py310"
|
||||||
|
line-length = 88
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "I", "UP", "B"]
|
||||||
|
|
||||||
68
python/qroissant/__init__.py
Normal file
68
python/qroissant/__init__.py
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
"""Public Python API for qroissant."""
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AsyncConnection",
|
||||||
|
"AsyncPool",
|
||||||
|
"AsyncRawResponse",
|
||||||
|
"Atom",
|
||||||
|
"Attribute",
|
||||||
|
"Compression",
|
||||||
|
"Connection",
|
||||||
|
"DecodeError",
|
||||||
|
"DecodeOptions",
|
||||||
|
"DecodeOptionsBuilder",
|
||||||
|
"Dictionary",
|
||||||
|
"EncodeField",
|
||||||
|
"EncodeFieldBuilder",
|
||||||
|
"EncodeOptions",
|
||||||
|
"EncodeOptionsBuilder",
|
||||||
|
"Encoding",
|
||||||
|
"Endpoint",
|
||||||
|
"List",
|
||||||
|
"ListInterpretation",
|
||||||
|
"MessageHeader",
|
||||||
|
"MessageType",
|
||||||
|
"OperationError",
|
||||||
|
"Pool",
|
||||||
|
"PoolClosedError",
|
||||||
|
"PoolError",
|
||||||
|
"PoolMetrics",
|
||||||
|
"PoolOptions",
|
||||||
|
"Primitive",
|
||||||
|
"ProtocolError",
|
||||||
|
"QRuntimeError",
|
||||||
|
"QroissantError",
|
||||||
|
"RawResponse",
|
||||||
|
"Shape",
|
||||||
|
"StringInterpretation",
|
||||||
|
"SymbolInterpretation",
|
||||||
|
"Table",
|
||||||
|
"TransportError",
|
||||||
|
"Type",
|
||||||
|
"UnionMode",
|
||||||
|
"Value",
|
||||||
|
"Vector",
|
||||||
|
"__native_available__",
|
||||||
|
"FormattingOptions",
|
||||||
|
"FormattingOptionsBuilder",
|
||||||
|
"RowDisplay",
|
||||||
|
"decode",
|
||||||
|
"encode",
|
||||||
|
"get_formatting_options",
|
||||||
|
"reset_formatting_options",
|
||||||
|
"set_formatting_options",
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
_native = importlib.import_module("qroissant._native")
|
||||||
|
for _name in __all__:
|
||||||
|
if _name != "__native_available__":
|
||||||
|
globals()[_name] = getattr(_native, _name)
|
||||||
|
__native_available__ = True
|
||||||
|
except ImportError: # pragma: no cover
|
||||||
|
__native_available__ = False
|
||||||
|
|
||||||
|
if not __native_available__:
|
||||||
|
__all__ = ["__native_available__"]
|
||||||
50
python/qroissant/__init__.pyi
Normal file
50
python/qroissant/__init__.pyi
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
from qroissant._client import AsyncConnection as AsyncConnection
|
||||||
|
from qroissant._client import AsyncPool as AsyncPool
|
||||||
|
from qroissant._client import AsyncRawResponse as AsyncRawResponse
|
||||||
|
from qroissant._client import Connection as Connection
|
||||||
|
from qroissant._client import Pool as Pool
|
||||||
|
from qroissant._client import PoolMetrics as PoolMetrics
|
||||||
|
from qroissant._client import RawResponse as RawResponse
|
||||||
|
from qroissant._config import DecodeOptions as DecodeOptions
|
||||||
|
from qroissant._config import DecodeOptionsBuilder as DecodeOptionsBuilder
|
||||||
|
from qroissant._config import EncodeField as EncodeField
|
||||||
|
from qroissant._config import EncodeFieldBuilder as EncodeFieldBuilder
|
||||||
|
from qroissant._config import EncodeOptions as EncodeOptions
|
||||||
|
from qroissant._config import EncodeOptionsBuilder as EncodeOptionsBuilder
|
||||||
|
from qroissant._config import Endpoint as Endpoint
|
||||||
|
from qroissant._config import ListInterpretation as ListInterpretation
|
||||||
|
from qroissant._config import PoolOptions as PoolOptions
|
||||||
|
from qroissant._config import StringInterpretation as StringInterpretation
|
||||||
|
from qroissant._config import SymbolInterpretation as SymbolInterpretation
|
||||||
|
from qroissant._config import UnionMode as UnionMode
|
||||||
|
from qroissant._errors import DecodeError as DecodeError
|
||||||
|
from qroissant._errors import OperationError as OperationError
|
||||||
|
from qroissant._errors import PoolClosedError as PoolClosedError
|
||||||
|
from qroissant._errors import PoolError as PoolError
|
||||||
|
from qroissant._errors import ProtocolError as ProtocolError
|
||||||
|
from qroissant._errors import QroissantError as QroissantError
|
||||||
|
from qroissant._errors import QRuntimeError as QRuntimeError
|
||||||
|
from qroissant._errors import TransportError as TransportError
|
||||||
|
from qroissant._message import Compression as Compression
|
||||||
|
from qroissant._message import Encoding as Encoding
|
||||||
|
from qroissant._message import MessageHeader as MessageHeader
|
||||||
|
from qroissant._message import MessageType as MessageType
|
||||||
|
from qroissant._repr import FormattingOptions as FormattingOptions
|
||||||
|
from qroissant._repr import FormattingOptionsBuilder as FormattingOptionsBuilder
|
||||||
|
from qroissant._repr import RowDisplay as RowDisplay
|
||||||
|
from qroissant._repr import get_formatting_options as get_formatting_options
|
||||||
|
from qroissant._repr import reset_formatting_options as reset_formatting_options
|
||||||
|
from qroissant._repr import set_formatting_options as set_formatting_options
|
||||||
|
from qroissant._serde import decode as decode
|
||||||
|
from qroissant._serde import encode as encode
|
||||||
|
from qroissant._values import Atom as Atom
|
||||||
|
from qroissant._values import Attribute as Attribute
|
||||||
|
from qroissant._values import Dictionary as Dictionary
|
||||||
|
from qroissant._values import List as List
|
||||||
|
from qroissant._values import Primitive as Primitive
|
||||||
|
from qroissant._values import Shape as Shape
|
||||||
|
from qroissant._values import Table as Table
|
||||||
|
from qroissant._values import Type as Type
|
||||||
|
from qroissant._values import Value as Value
|
||||||
|
from qroissant._values import Vector as Vector
|
||||||
|
|
||||||
453
python/qroissant/_client.pyi
Normal file
453
python/qroissant/_client.pyi
Normal file
|
|
@ -0,0 +1,453 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from types import TracebackType
|
||||||
|
from typing import Literal, overload
|
||||||
|
|
||||||
|
from qroissant._config import DecodeOptions, Endpoint, PoolOptions
|
||||||
|
from qroissant._message import MessageHeader
|
||||||
|
from qroissant._values import Value
|
||||||
|
|
||||||
|
class RawResponse:
|
||||||
|
"""Read-only file-like wrapper over raw q IPC response bytes.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Raw queries stream responses forward-only, decoding them on demand.
|
||||||
|
|
||||||
|
Thread Safety
|
||||||
|
-------------
|
||||||
|
**Not thread-safe.** Each ``RawResponse`` holds an exclusive lease on its
|
||||||
|
parent connection — the connection cannot be reused until the response is
|
||||||
|
fully consumed or explicitly closed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __enter__(self) -> RawResponse: ...
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
@property
|
||||||
|
def closed(self) -> bool:
|
||||||
|
"""Check if the response is closed."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def header(self) -> MessageHeader:
|
||||||
|
"""Retrieve the IPC message header."""
|
||||||
|
...
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the underlying response stream."""
|
||||||
|
...
|
||||||
|
def readable(self) -> bool:
|
||||||
|
"""Return True if the stream is readable."""
|
||||||
|
...
|
||||||
|
def seekable(self) -> bool:
|
||||||
|
"""Return True if the stream supports random access (returns False)."""
|
||||||
|
...
|
||||||
|
def tell(self) -> int:
|
||||||
|
"""Return the current stream position."""
|
||||||
|
...
|
||||||
|
def read(self, size: int | None = None, /) -> bytes:
|
||||||
|
"""Read bytes from the response payload."""
|
||||||
|
...
|
||||||
|
def read1(self, size: int | None = None, /) -> bytes:
|
||||||
|
"""Read bytes from the response payload with minimal blocking."""
|
||||||
|
...
|
||||||
|
def readinto(self, buffer: bytearray | memoryview, /) -> int:
|
||||||
|
"""Read payload bytes into a pre-allocated buffer."""
|
||||||
|
...
|
||||||
|
def readinto1(self, buffer: bytearray | memoryview, /) -> int:
|
||||||
|
"""Read payload bytes into a pre-allocated buffer with minimal blocking."""
|
||||||
|
...
|
||||||
|
def decode(self, *, options: DecodeOptions | None = None) -> Value:
|
||||||
|
"""Decode the remaining payload bytes into a qroissant value."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncRawResponse:
|
||||||
|
"""Asynchronous read-only wrapper over raw q IPC response bytes."""
|
||||||
|
|
||||||
|
async def __aenter__(self) -> AsyncRawResponse: ...
|
||||||
|
async def __aexit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
@property
|
||||||
|
def closed(self) -> bool:
|
||||||
|
"""Check if the response is closed."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def header(self) -> MessageHeader:
|
||||||
|
"""Retrieve the IPC message header."""
|
||||||
|
...
|
||||||
|
async def close(self) -> None:
|
||||||
|
"""Close the underlying response stream."""
|
||||||
|
...
|
||||||
|
async def read(self, size: int | None = None, /) -> bytes:
|
||||||
|
"""Read bytes from the response payload asynchronously."""
|
||||||
|
...
|
||||||
|
async def read1(self, size: int | None = None, /) -> bytes:
|
||||||
|
"""Read bytes with minimal blocking asynchronously."""
|
||||||
|
...
|
||||||
|
async def readinto(self, buffer: bytearray | memoryview, /) -> int:
|
||||||
|
"""Read payload bytes into a buffer asynchronously."""
|
||||||
|
...
|
||||||
|
async def readinto1(self, buffer: bytearray | memoryview, /) -> int:
|
||||||
|
"""Read payload bytes into a buffer with minimal blocking asynchronously."""
|
||||||
|
...
|
||||||
|
async def decode(self, *, options: DecodeOptions | None = None) -> Value:
|
||||||
|
"""Decode the remaining payload bytes into a qroissant value asynchronously."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class PoolMetrics:
|
||||||
|
"""Snapshot of a pool's occupancy, configuration, and lifecycle state."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def connections(self) -> int:
|
||||||
|
"""Total number of currently tracked connections."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def idle_connections(self) -> int:
|
||||||
|
"""Number of tracked connections that are currently idle."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def max_size(self) -> int:
|
||||||
|
"""Configured maximum pool size."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def min_idle(self) -> int | None:
|
||||||
|
"""Configured minimum idle target."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def initialized(self) -> bool:
|
||||||
|
"""Whether the underlying pool has been created yet."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def closed(self) -> bool:
|
||||||
|
"""Whether the pool has been explicitly closed."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Connection:
|
||||||
|
"""Synchronous connection capable of executing q IPC requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
endpoint : Endpoint
|
||||||
|
Addressing and transport configuration.
|
||||||
|
options : DecodeOptions | None, optional
|
||||||
|
Default options to apply to decoded queried wrapper objects.
|
||||||
|
|
||||||
|
Thread Safety
|
||||||
|
-------------
|
||||||
|
**Not thread-safe.** Use one connection per thread, or use :class:`Pool`
|
||||||
|
for safe multi-threaded access.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: Endpoint,
|
||||||
|
*,
|
||||||
|
options: DecodeOptions | None = None,
|
||||||
|
) -> None: ...
|
||||||
|
def __enter__(self) -> Connection: ...
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the underlying connection."""
|
||||||
|
...
|
||||||
|
@overload
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[False] = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value: ...
|
||||||
|
@overload
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[True],
|
||||||
|
) -> RawResponse: ...
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: bool = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value | RawResponse:
|
||||||
|
"""Execute a synchronous q expression.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
expr : str
|
||||||
|
The q expression to evaluate remotely.
|
||||||
|
raw : bool, default=False
|
||||||
|
If True, returns a `RawResponse` stream instead of decoding.
|
||||||
|
decode : DecodeOptions | None, optional
|
||||||
|
Decoding options for this specific query, overriding connection defaults.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Value | RawResponse
|
||||||
|
Decoded wrapper value or a file-like raw response if `raw=True`.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncConnection:
|
||||||
|
"""Asynchronous connection capable of executing q IPC requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
endpoint : Endpoint
|
||||||
|
Addressing and transport configuration.
|
||||||
|
options : DecodeOptions | None, optional
|
||||||
|
Default options to apply to decoded queried wrapper objects.
|
||||||
|
|
||||||
|
Thread Safety
|
||||||
|
-------------
|
||||||
|
**Not thread-safe.** Must be driven from a single async task. Use
|
||||||
|
:class:`AsyncPool` for concurrent access.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: Endpoint,
|
||||||
|
*,
|
||||||
|
options: DecodeOptions | None = None,
|
||||||
|
) -> None: ...
|
||||||
|
async def __aenter__(self) -> AsyncConnection: ...
|
||||||
|
async def __aexit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
async def close(self) -> None:
|
||||||
|
"""Close the underlying connection."""
|
||||||
|
...
|
||||||
|
@overload
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[False] = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value: ...
|
||||||
|
@overload
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[True],
|
||||||
|
) -> AsyncRawResponse: ...
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: bool = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value | AsyncRawResponse:
|
||||||
|
"""Execute an asynchronous q expression.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
expr : str
|
||||||
|
The q expression to evaluate remotely.
|
||||||
|
raw : bool, default=False
|
||||||
|
If True, returns a `AsyncRawResponse` stream instead of decoding.
|
||||||
|
decode : DecodeOptions | None, optional
|
||||||
|
Decoding options for this specific query, overriding connection defaults.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Value | AsyncRawResponse
|
||||||
|
Decoded wrapper value or a file-like raw response if `raw=True`.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Pool:
|
||||||
|
"""Synchronous connection pool for issuing q IPC requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
endpoint : Endpoint
|
||||||
|
Addressing and transport configuration.
|
||||||
|
options : DecodeOptions | None, optional
|
||||||
|
Default options to apply to decoded queried wrapper objects.
|
||||||
|
pool : PoolOptions | None, optional
|
||||||
|
Pool lifecycle and retry configuration.
|
||||||
|
|
||||||
|
Thread Safety
|
||||||
|
-------------
|
||||||
|
**Thread-safe.** Designed for multi-threaded use — each call checks out a
|
||||||
|
connection, uses it, and returns it to the pool automatically.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: Endpoint,
|
||||||
|
*,
|
||||||
|
options: DecodeOptions | None = None,
|
||||||
|
pool: PoolOptions | None = None,
|
||||||
|
) -> None: ...
|
||||||
|
def __enter__(self) -> Pool: ...
|
||||||
|
def __exit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
def close(self) -> None:
|
||||||
|
"""Close the pool and reject future checkouts."""
|
||||||
|
...
|
||||||
|
def prewarm(self) -> PoolMetrics:
|
||||||
|
"""Create and validate idle connections ahead of the next query."""
|
||||||
|
...
|
||||||
|
def metrics(self) -> PoolMetrics:
|
||||||
|
"""Return a snapshot of occupancy, configuration, and lifecycle state."""
|
||||||
|
...
|
||||||
|
@overload
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[False] = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value: ...
|
||||||
|
@overload
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[True],
|
||||||
|
) -> RawResponse: ...
|
||||||
|
def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: bool = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value | RawResponse:
|
||||||
|
"""Execute a pooled synchronous q expression.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
expr : str
|
||||||
|
The q expression to evaluate remotely.
|
||||||
|
raw : bool, default=False
|
||||||
|
If True, returns a `RawResponse` stream instead of decoding.
|
||||||
|
decode : DecodeOptions | None, optional
|
||||||
|
Decoding options for this specific query, overriding connection defaults.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Value | RawResponse
|
||||||
|
Decoded wrapper value or a file-like raw response if `raw=True`.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class AsyncPool:
|
||||||
|
"""Asynchronous connection pool for issuing q IPC requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
endpoint : Endpoint
|
||||||
|
Addressing and transport configuration.
|
||||||
|
options : DecodeOptions | None, optional
|
||||||
|
Default options to apply to decoded queried wrapper objects.
|
||||||
|
pool : PoolOptions | None, optional
|
||||||
|
Pool lifecycle and retry configuration.
|
||||||
|
|
||||||
|
Thread Safety
|
||||||
|
-------------
|
||||||
|
**Thread-safe.** Designed for concurrent use across multiple async tasks.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: Endpoint,
|
||||||
|
*,
|
||||||
|
options: DecodeOptions | None = None,
|
||||||
|
pool: PoolOptions | None = None,
|
||||||
|
) -> None: ...
|
||||||
|
async def __aenter__(self) -> AsyncPool: ...
|
||||||
|
async def __aexit__(
|
||||||
|
self,
|
||||||
|
exc_type: type[BaseException] | None,
|
||||||
|
exc_val: BaseException | None,
|
||||||
|
exc_tb: TracebackType | None,
|
||||||
|
) -> None: ...
|
||||||
|
async def close(self) -> None:
|
||||||
|
"""Close the pool and reject future checkouts."""
|
||||||
|
...
|
||||||
|
async def prewarm(self) -> PoolMetrics:
|
||||||
|
"""Create and validate idle connections ahead of the next query."""
|
||||||
|
...
|
||||||
|
async def metrics(self) -> PoolMetrics:
|
||||||
|
"""Return a snapshot of occupancy, configuration, and lifecycle state."""
|
||||||
|
...
|
||||||
|
@overload
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[False] = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value: ...
|
||||||
|
@overload
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: Literal[True],
|
||||||
|
) -> AsyncRawResponse: ...
|
||||||
|
async def query(
|
||||||
|
self,
|
||||||
|
expr: str,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
raw: bool = False,
|
||||||
|
decode: DecodeOptions | None = None,
|
||||||
|
) -> Value | AsyncRawResponse:
|
||||||
|
"""Execute a pooled asynchronous q expression.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
expr : str
|
||||||
|
The q expression to evaluate remotely.
|
||||||
|
raw : bool, default=False
|
||||||
|
If True, returns a `AsyncRawResponse` stream instead of decoding.
|
||||||
|
decode : DecodeOptions | None, optional
|
||||||
|
Decoding options for this specific query, overriding connection defaults.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Value | AsyncRawResponse
|
||||||
|
Decoded wrapper value or a file-like raw response if `raw=True`.
|
||||||
|
"""
|
||||||
|
...
|
||||||
416
python/qroissant/_config.pyi
Normal file
416
python/qroissant/_config.pyi
Normal file
|
|
@ -0,0 +1,416 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
|
||||||
|
from qroissant._values import Attribute, Primitive, Shape
|
||||||
|
|
||||||
|
class Endpoint:
|
||||||
|
"""Connection destination configuration for a q process.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
An endpoint describes how to connect to a q process, including the transport
|
||||||
|
protocol (TCP or Unix socket), address, and authentication credentials.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def tcp(
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
*,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
timeout_ms: int | None = None,
|
||||||
|
) -> Endpoint:
|
||||||
|
"""Create a TCP endpoint configuration.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
host : str
|
||||||
|
Hostname or IP address of the q process.
|
||||||
|
port : int
|
||||||
|
TCP port exposed by the q process.
|
||||||
|
username : str | None, optional
|
||||||
|
Username used during the q IPC handshake.
|
||||||
|
password : str | None, optional
|
||||||
|
Password used during the q IPC handshake.
|
||||||
|
timeout_ms : int | None, optional
|
||||||
|
Connection timeout in milliseconds.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Endpoint
|
||||||
|
TCP endpoint configuration.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def unix(
|
||||||
|
path: str,
|
||||||
|
*,
|
||||||
|
username: str | None = None,
|
||||||
|
password: str | None = None,
|
||||||
|
timeout_ms: int | None = None,
|
||||||
|
) -> Endpoint:
|
||||||
|
"""Create a Unix-domain socket endpoint configuration.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path : str
|
||||||
|
Filesystem path to the q Unix domain socket.
|
||||||
|
username : str | None, optional
|
||||||
|
Username used during the q IPC handshake.
|
||||||
|
password : str | None, optional
|
||||||
|
Password used during the q IPC handshake.
|
||||||
|
timeout_ms : int | None, optional
|
||||||
|
Connection timeout in milliseconds.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Endpoint
|
||||||
|
Unix domain socket endpoint configuration.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@property
|
||||||
|
def scheme(self) -> str:
|
||||||
|
"""The transport scheme ('tcp' or 'unix')."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def host(self) -> str | None:
|
||||||
|
"""The hostname, if applicable."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def port(self) -> int | None:
|
||||||
|
"""The port, if applicable."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def path(self) -> str | None:
|
||||||
|
"""The socket path, if applicable."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def username(self) -> str | None:
|
||||||
|
"""The configured username."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def password(self) -> str | None:
|
||||||
|
"""The configured password."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def timeout_ms(self) -> int | None:
|
||||||
|
"""The connection timeout in milliseconds."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class PoolOptions:
|
||||||
|
"""Connection pool configuration shared by sync and async pool types.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
max_size : int, default=10
|
||||||
|
Maximum number of connections managed by the pool.
|
||||||
|
min_idle : int | None, optional
|
||||||
|
Minimum number of idle connections to retain when the pool is warmed.
|
||||||
|
checkout_timeout_ms : int, default=30000
|
||||||
|
Maximum time spent waiting for a pooled connection checkout.
|
||||||
|
idle_timeout_ms : int | None, optional
|
||||||
|
Maximum idle lifetime for an unused pooled connection.
|
||||||
|
max_lifetime_ms : int | None, optional
|
||||||
|
Maximum total lifetime for a pooled connection.
|
||||||
|
test_on_checkout : bool, default=True
|
||||||
|
Whether connections should be validated before they are handed out.
|
||||||
|
healthcheck_query : str | None, default="::"
|
||||||
|
Optional q expression used to validate live pooled connections.
|
||||||
|
Set to ``None`` to disable active q health checks.
|
||||||
|
retry_attempts : int, default=0
|
||||||
|
Number of retry attempts after the initial pooled query failure.
|
||||||
|
retry_backoff_ms : int, default=0
|
||||||
|
Delay between retry attempts, expressed in milliseconds.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
max_size: int = 10,
|
||||||
|
min_idle: int | None = None,
|
||||||
|
checkout_timeout_ms: int = 30_000,
|
||||||
|
idle_timeout_ms: int | None = None,
|
||||||
|
max_lifetime_ms: int | None = None,
|
||||||
|
test_on_checkout: bool = True,
|
||||||
|
healthcheck_query: str | None = "::",
|
||||||
|
retry_attempts: int = 0,
|
||||||
|
retry_backoff_ms: int = 0,
|
||||||
|
) -> None: ...
|
||||||
|
@property
|
||||||
|
def max_size(self) -> int:
|
||||||
|
"""Configured maximum number of connections managed by the pool."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def min_idle(self) -> int | None:
|
||||||
|
"""Configured minimum number of idle connections to retain."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def checkout_timeout_ms(self) -> int:
|
||||||
|
"""Configured maximum wait time for a connection checkout, in milliseconds."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def idle_timeout_ms(self) -> int | None:
|
||||||
|
"""Configured maximum idle lifetime for an unused connection, in milliseconds."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def max_lifetime_ms(self) -> int | None:
|
||||||
|
"""Configured maximum total lifetime for a connection, in milliseconds."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def test_on_checkout(self) -> bool:
|
||||||
|
"""Whether connections are validated before being handed out."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def healthcheck_query(self) -> str | None:
|
||||||
|
"""q expression used to validate live pooled connections, or ``None`` to disable."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def retry_attempts(self) -> int:
|
||||||
|
"""Number of retry attempts after the initial pooled query failure."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def retry_backoff_ms(self) -> int:
|
||||||
|
"""Delay between retry attempts, in milliseconds."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class SymbolInterpretation(enum.Enum):
|
||||||
|
"""Arrow representation used for q symbols."""
|
||||||
|
UTF8 = ...
|
||||||
|
LARGE_UTF8 = ...
|
||||||
|
UTF8_VIEW = ...
|
||||||
|
DICTIONARY = ...
|
||||||
|
RAW_BYTES = ...
|
||||||
|
|
||||||
|
|
||||||
|
class ListInterpretation(enum.Enum):
|
||||||
|
"""Arrow representation used for q lists."""
|
||||||
|
LIST = ...
|
||||||
|
LARGE_LIST = ...
|
||||||
|
LIST_VIEW = ...
|
||||||
|
|
||||||
|
|
||||||
|
class StringInterpretation(enum.Enum):
|
||||||
|
"""Arrow representation used for q char data."""
|
||||||
|
UTF8 = ...
|
||||||
|
BINARY = ...
|
||||||
|
|
||||||
|
|
||||||
|
class UnionMode(enum.Enum):
|
||||||
|
"""Arrow union representation used for mixed general lists."""
|
||||||
|
DENSE = ...
|
||||||
|
SPARSE = ...
|
||||||
|
|
||||||
|
|
||||||
|
class DecodeOptions:
|
||||||
|
"""Deserialization and Arrow conversion options.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Construct instances through :meth:`builder` and finish with
|
||||||
|
:meth:`DecodeOptionsBuilder.build`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def builder() -> DecodeOptionsBuilder:
|
||||||
|
"""Create a builder initialized with qroissant's default options."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def list_interpretation(self) -> ListInterpretation:
|
||||||
|
"""Arrow container type used when projecting q lists."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def union_mode(self) -> UnionMode:
|
||||||
|
"""Arrow union encoding used for mixed general lists."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def string_interpretation(self) -> StringInterpretation:
|
||||||
|
"""Arrow type used when projecting q char vectors."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def symbol_interpretation(self) -> SymbolInterpretation:
|
||||||
|
"""Arrow type used when projecting q symbol vectors and atoms."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def assume_symbol_utf8(self) -> bool:
|
||||||
|
"""Whether q symbols are assumed to be valid UTF-8."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def parallel(self) -> bool:
|
||||||
|
"""Whether table columns are decoded in parallel using multiple threads."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def preserve_original_body(self) -> bool:
|
||||||
|
"""Whether the raw IPC payload bytes are retained on the decoded value."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def validate_compressed_trailing_bytes(self) -> bool:
|
||||||
|
"""Whether trailing zero bytes after LZW-decompressed output are validated."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def temporal_nulls(self) -> bool:
|
||||||
|
"""Whether temporal null sentinels are mapped to ``None`` in Arrow arrays."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def treat_infinity_as_null(self) -> bool:
|
||||||
|
"""Whether ±∞ sentinels are mapped to ``None`` in Arrow arrays."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class DecodeOptionsBuilder:
|
||||||
|
"""Builder for :class:`DecodeOptions`."""
|
||||||
|
|
||||||
|
def with_list_interpretation(
|
||||||
|
self, value: ListInterpretation, /
|
||||||
|
) -> DecodeOptionsBuilder:
|
||||||
|
"""Set the Arrow container type for q list projection."""
|
||||||
|
...
|
||||||
|
def with_union_mode(self, value: UnionMode, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set the Arrow union encoding for mixed general lists."""
|
||||||
|
...
|
||||||
|
def with_string_interpretation(
|
||||||
|
self, value: StringInterpretation, /
|
||||||
|
) -> DecodeOptionsBuilder:
|
||||||
|
"""Set the Arrow type for q char vector projection."""
|
||||||
|
...
|
||||||
|
def with_symbol_interpretation(
|
||||||
|
self, value: SymbolInterpretation, /
|
||||||
|
) -> DecodeOptionsBuilder:
|
||||||
|
"""Set the Arrow type for q symbol projection."""
|
||||||
|
...
|
||||||
|
def with_assume_symbol_utf8(self, value: bool, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether q symbols are assumed to be valid UTF-8."""
|
||||||
|
...
|
||||||
|
def with_parallel(self, value: bool, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether table columns are decoded in parallel."""
|
||||||
|
...
|
||||||
|
def with_preserve_original_body(self, value: bool, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether the raw IPC payload bytes are retained on decoded values."""
|
||||||
|
...
|
||||||
|
def with_validate_compressed_trailing_bytes(
|
||||||
|
self, value: bool, /
|
||||||
|
) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether trailing zero bytes after LZW decompression are validated."""
|
||||||
|
...
|
||||||
|
def with_temporal_nulls(self, value: bool, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether temporal null sentinels are mapped to ``None`` in Arrow arrays."""
|
||||||
|
...
|
||||||
|
def with_treat_infinity_as_null(self, value: bool, /) -> DecodeOptionsBuilder:
|
||||||
|
"""Set whether ±∞ sentinels are mapped to ``None`` in Arrow arrays."""
|
||||||
|
...
|
||||||
|
def build(self) -> DecodeOptions:
|
||||||
|
"""Finalize the builder into an immutable :class:`DecodeOptions` instance."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeField:
|
||||||
|
"""q serialization hints for a single Arrow value or table column."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def builder() -> EncodeFieldBuilder:
|
||||||
|
"""Create a builder initialized with empty serialization hints."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def primitive(self) -> Primitive | None:
|
||||||
|
"""Forced q primitive type for this field, or ``None`` to infer."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def shape(self) -> Shape | None:
|
||||||
|
"""Forced q structural shape for this field, or ``None`` to infer."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def attribute(self) -> Attribute | None:
|
||||||
|
"""q attribute to apply to this field, or ``None`` for no attribute."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def sorted(self) -> bool | None:
|
||||||
|
"""Whether this field is sorted, or ``None`` to infer from the attribute."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeFieldBuilder:
|
||||||
|
"""Builder for :class:`EncodeField`."""
|
||||||
|
|
||||||
|
def with_primitive(self, value: Primitive, /) -> EncodeFieldBuilder:
|
||||||
|
"""Force the q primitive type for this field."""
|
||||||
|
...
|
||||||
|
def with_shape(self, value: Shape, /) -> EncodeFieldBuilder:
|
||||||
|
"""Force the q structural shape for this field."""
|
||||||
|
...
|
||||||
|
def with_attribute(self, value: Attribute, /) -> EncodeFieldBuilder:
|
||||||
|
"""Set the q attribute to apply to this field."""
|
||||||
|
...
|
||||||
|
def with_sorted(self, value: bool, /) -> EncodeFieldBuilder:
|
||||||
|
"""Set whether this field is sorted."""
|
||||||
|
...
|
||||||
|
def build(self) -> EncodeField:
|
||||||
|
"""Finalize the builder into immutable per-field encoding hints."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeOptions:
|
||||||
|
"""Encoding options for Arrow-backed :func:`qroissant.encode` calls.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
qroissant resolves Arrow inputs in the following order:
|
||||||
|
|
||||||
|
1. qroissant metadata embedded on the Arrow field or schema
|
||||||
|
2. per-field overrides attached through :meth:`EncodeOptionsBuilder.with_field`
|
||||||
|
3. global defaults on :class:`EncodeOptions`
|
||||||
|
4. generic Arrow type inference
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def builder() -> EncodeOptionsBuilder:
|
||||||
|
"""Create a builder initialized with qroissant's default encoding policy."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def primitive(self) -> Primitive | None:
|
||||||
|
"""Global fallback q primitive type, or ``None`` to infer per-field."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def shape(self) -> Shape | None:
|
||||||
|
"""Global fallback q structural shape, or ``None`` to infer per-field."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def attribute(self) -> Attribute | None:
|
||||||
|
"""Global fallback q attribute, or ``None`` for no attribute."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def strict(self) -> bool:
|
||||||
|
"""Whether unknown Arrow types raise an error instead of being inferred."""
|
||||||
|
...
|
||||||
|
def field(self, name: str, /) -> EncodeField | None:
|
||||||
|
"""Return per-field encoding hints for the given column name, if any."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeOptionsBuilder:
|
||||||
|
"""Builder for :class:`EncodeOptions`."""
|
||||||
|
|
||||||
|
def with_primitive(self, value: Primitive, /) -> EncodeOptionsBuilder:
|
||||||
|
"""Set the global fallback q primitive type."""
|
||||||
|
...
|
||||||
|
def with_shape(self, value: Shape, /) -> EncodeOptionsBuilder:
|
||||||
|
"""Set the global fallback q structural shape."""
|
||||||
|
...
|
||||||
|
def with_attribute(self, value: Attribute, /) -> EncodeOptionsBuilder:
|
||||||
|
"""Set the global fallback q attribute."""
|
||||||
|
...
|
||||||
|
def with_strict(self, value: bool, /) -> EncodeOptionsBuilder:
|
||||||
|
"""Set whether unknown Arrow types raise an error instead of being inferred."""
|
||||||
|
...
|
||||||
|
def with_field(
|
||||||
|
self, name: str, field: EncodeField, /
|
||||||
|
) -> EncodeOptionsBuilder:
|
||||||
|
"""Attach per-field encoding hints for the given column name."""
|
||||||
|
...
|
||||||
|
def build(self) -> EncodeOptions:
|
||||||
|
"""Finalize the builder into immutable Arrow-to-q encoding options."""
|
||||||
|
...
|
||||||
24
python/qroissant/_errors.pyi
Normal file
24
python/qroissant/_errors.pyi
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
class QroissantError(Exception):
|
||||||
|
"""Base class for qroissant-specific failures."""
|
||||||
|
|
||||||
|
class DecodeError(QroissantError):
|
||||||
|
"""Raised when q IPC payload decoding fails."""
|
||||||
|
|
||||||
|
class ProtocolError(QroissantError):
|
||||||
|
"""Raised when q IPC framing or message validation fails."""
|
||||||
|
|
||||||
|
class TransportError(QroissantError):
|
||||||
|
"""Raised when transport IO or socket operations fail."""
|
||||||
|
|
||||||
|
class OperationError(QroissantError):
|
||||||
|
"""Raised when a qroissant operation is unsupported."""
|
||||||
|
|
||||||
|
class QRuntimeError(QroissantError):
|
||||||
|
"""Raised when the remote q process returns an error response."""
|
||||||
|
|
||||||
|
class PoolError(QroissantError):
|
||||||
|
"""Raised when connection pool management fails."""
|
||||||
|
|
||||||
|
class PoolClosedError(PoolError):
|
||||||
|
"""Raised when a closed connection pool is used."""
|
||||||
|
|
||||||
43
python/qroissant/_message.pyi
Normal file
43
python/qroissant/_message.pyi
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
|
||||||
|
class Encoding(enum.Enum):
|
||||||
|
"""Endianness of the q IPC message payload."""
|
||||||
|
LITTLE_ENDIAN = ...
|
||||||
|
BIG_ENDIAN = ...
|
||||||
|
|
||||||
|
|
||||||
|
class Compression(enum.Enum):
|
||||||
|
"""Compression mode of the q IPC message payload."""
|
||||||
|
UNCOMPRESSED = ...
|
||||||
|
COMPRESSED = ...
|
||||||
|
COMPRESSED_LARGE = ...
|
||||||
|
|
||||||
|
|
||||||
|
class MessageType(enum.Enum):
|
||||||
|
"""IPC message type tag."""
|
||||||
|
ASYNCHRONOUS = ...
|
||||||
|
SYNCHRONOUS = ...
|
||||||
|
RESPONSE = ...
|
||||||
|
|
||||||
|
|
||||||
|
class MessageHeader:
|
||||||
|
"""Header information extracted from a q IPC frame."""
|
||||||
|
@property
|
||||||
|
def encoding(self) -> Encoding:
|
||||||
|
"""The endianness of the payload."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def message_type(self) -> MessageType:
|
||||||
|
"""The message type tag."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def compression(self) -> Compression:
|
||||||
|
"""The compression mode."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def size(self) -> int:
|
||||||
|
"""The total size of the message frame in bytes."""
|
||||||
|
...
|
||||||
|
|
||||||
67
python/qroissant/_repr.pyi
Normal file
67
python/qroissant/_repr.pyi
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
|
||||||
|
class RowDisplay(enum.Enum):
|
||||||
|
"""Row selection strategy used by qroissant repr formatting."""
|
||||||
|
|
||||||
|
HEAD = ...
|
||||||
|
HEAD_TAIL = ...
|
||||||
|
|
||||||
|
|
||||||
|
class FormattingOptions:
|
||||||
|
"""Formatting options for user-facing qroissant string representations.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
These options control how qroissant values render through :func:`str` and
|
||||||
|
:func:`repr`. Apply them process-wide through
|
||||||
|
:func:`qroissant.set_formatting_options`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def builder() -> FormattingOptionsBuilder:
|
||||||
|
"""Create a builder initialized with qroissant's default formatting policy."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def max_rows(self) -> int:
|
||||||
|
"""Maximum number of rows displayed in table repr."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def max_columns(self) -> int:
|
||||||
|
"""Maximum number of columns displayed in table repr."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def row_display(self) -> RowDisplay:
|
||||||
|
"""Row selection strategy used when the row limit is exceeded."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class FormattingOptionsBuilder:
|
||||||
|
"""Builder for :class:`FormattingOptions`."""
|
||||||
|
|
||||||
|
def with_max_rows(self, value: int, /) -> FormattingOptionsBuilder:
|
||||||
|
"""Set the maximum number of rows displayed in table repr."""
|
||||||
|
...
|
||||||
|
def with_max_columns(self, value: int, /) -> FormattingOptionsBuilder:
|
||||||
|
"""Set the maximum number of columns displayed in table repr."""
|
||||||
|
...
|
||||||
|
def with_row_display(self, value: RowDisplay, /) -> FormattingOptionsBuilder:
|
||||||
|
"""Set the row selection strategy used when the row limit is exceeded."""
|
||||||
|
...
|
||||||
|
def build(self) -> FormattingOptions:
|
||||||
|
"""Finalize the builder into immutable repr formatting options."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def get_formatting_options() -> FormattingOptions:
|
||||||
|
"""Get the current process-wide formatting options."""
|
||||||
|
...
|
||||||
|
|
||||||
|
def set_formatting_options(options: FormattingOptions, /) -> None:
|
||||||
|
"""Set the process-wide formatting options."""
|
||||||
|
...
|
||||||
|
|
||||||
|
def reset_formatting_options() -> None:
|
||||||
|
"""Reset the formatting options to their default values."""
|
||||||
|
...
|
||||||
72
python/qroissant/_serde.pyi
Normal file
72
python/qroissant/_serde.pyi
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from qroissant._config import DecodeOptions, EncodeOptions
|
||||||
|
from qroissant._message import Compression, Encoding, MessageType
|
||||||
|
from qroissant._values import Value
|
||||||
|
|
||||||
|
def decode(
|
||||||
|
payload: object,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
options: DecodeOptions | None = None,
|
||||||
|
) -> Value:
|
||||||
|
"""Decode an IPC payload into a typed qroissant value.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
payload : object
|
||||||
|
Raw q IPC payload as ``bytes``, a read-only contiguous buffer,
|
||||||
|
or an object exposing such a buffer via ``.data``.
|
||||||
|
options : DecodeOptions | None, optional
|
||||||
|
Decoding and Arrow projection options. When omitted, the default
|
||||||
|
qroissant options are used.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
Value
|
||||||
|
Decoded q object represented by qroissant wrapper types.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
DecodeError
|
||||||
|
Raised when the IPC payload cannot be decoded into a valid q value.
|
||||||
|
ProtocolError
|
||||||
|
Raised when the frame header is malformed or unsupported.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
def encode(
|
||||||
|
value: object,
|
||||||
|
/,
|
||||||
|
*,
|
||||||
|
options: EncodeOptions | None = None,
|
||||||
|
encoding: Encoding = Encoding.LITTLE_ENDIAN,
|
||||||
|
message_type: MessageType = MessageType.ASYNCHRONOUS,
|
||||||
|
compression: Compression = Compression.UNCOMPRESSED,
|
||||||
|
) -> bytes:
|
||||||
|
"""Encode a qroissant value into q IPC bytes.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
value : object
|
||||||
|
A qroissant wrapper value, ``None`` for q null, any object exposing a
|
||||||
|
compatible ``serialize`` method, or an object implementing the Arrow
|
||||||
|
PyCapsule interface.
|
||||||
|
options : EncodeOptions | None, optional
|
||||||
|
Arrow-to-q encoding hints applied when ``value`` is encoded
|
||||||
|
from an Arrow array or stream.
|
||||||
|
encoding : Encoding, default=Encoding.LITTLE_ENDIAN
|
||||||
|
Endianness used in the generated IPC payload.
|
||||||
|
message_type : MessageType, default=MessageType.ASYNCHRONOUS
|
||||||
|
IPC message type tag written into the frame header.
|
||||||
|
compression : Compression, default=Compression.UNCOMPRESSED
|
||||||
|
Compression mode for payload encoding.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bytes
|
||||||
|
Encoded q IPC payload.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
234
python/qroissant/_values.pyi
Normal file
234
python/qroissant/_values.pyi
Normal file
|
|
@ -0,0 +1,234 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import enum
|
||||||
|
from collections.abc import Iterator
|
||||||
|
|
||||||
|
from qroissant._config import EncodeOptions
|
||||||
|
from qroissant._message import Compression, Encoding, MessageType
|
||||||
|
|
||||||
|
class Attribute(enum.Enum):
|
||||||
|
"""q attribute applied to a value (e.g., sorted, unique, parted, grouped)."""
|
||||||
|
NONE = ...
|
||||||
|
SORTED = ...
|
||||||
|
UNIQUE = ...
|
||||||
|
PARTED = ...
|
||||||
|
GROUPED = ...
|
||||||
|
|
||||||
|
|
||||||
|
class Shape(enum.Enum):
|
||||||
|
"""Structural shape of the q value."""
|
||||||
|
ATOM = ...
|
||||||
|
VECTOR = ...
|
||||||
|
LIST = ...
|
||||||
|
DICTIONARY = ...
|
||||||
|
TABLE = ...
|
||||||
|
UNARY_PRIMITIVE = ...
|
||||||
|
|
||||||
|
|
||||||
|
class Primitive(enum.Enum):
|
||||||
|
"""Underlying primitive domain of the q value."""
|
||||||
|
BOOLEAN = ...
|
||||||
|
GUID = ...
|
||||||
|
BYTE = ...
|
||||||
|
SHORT = ...
|
||||||
|
INT = ...
|
||||||
|
LONG = ...
|
||||||
|
REAL = ...
|
||||||
|
FLOAT = ...
|
||||||
|
CHAR = ...
|
||||||
|
SYMBOL = ...
|
||||||
|
TIMESTAMP = ...
|
||||||
|
MONTH = ...
|
||||||
|
DATE = ...
|
||||||
|
DATETIME = ...
|
||||||
|
TIMESPAN = ...
|
||||||
|
MINUTE = ...
|
||||||
|
SECOND = ...
|
||||||
|
TIME = ...
|
||||||
|
MIXED = ...
|
||||||
|
|
||||||
|
|
||||||
|
class Type:
|
||||||
|
"""Type descriptor for a qroissant value."""
|
||||||
|
@property
|
||||||
|
def primitive(self) -> Primitive | None:
|
||||||
|
"""The underlying q primitive, or None if mixed."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def shape(self) -> Shape:
|
||||||
|
"""The structural shape of the value."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def attribute(self) -> Attribute | None:
|
||||||
|
"""The q attribute applied to the value, if any."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def sorted(self) -> bool | None:
|
||||||
|
"""Whether the value is sorted."""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Value:
|
||||||
|
"""Base class for decoded qroissant wrapper objects."""
|
||||||
|
@property
|
||||||
|
def qtype(self) -> Type:
|
||||||
|
"""The full q type descriptor."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def primitive(self) -> Primitive | None:
|
||||||
|
"""The underlying q primitive, or None if mixed."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def shape(self) -> Shape:
|
||||||
|
"""The structural shape of the value."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def attribute(self) -> Attribute | None:
|
||||||
|
"""The q attribute applied to the value, if any."""
|
||||||
|
...
|
||||||
|
def serialize(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
options: EncodeOptions | None = None,
|
||||||
|
encoding: Encoding = ...,
|
||||||
|
message_type: MessageType = ...,
|
||||||
|
compression: Compression = ...,
|
||||||
|
) -> bytes:
|
||||||
|
"""Serialize the value into a q IPC frame.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
options : EncodeOptions | None, optional
|
||||||
|
Arrow-to-q encoding hints. When omitted, the default encoding
|
||||||
|
policy is used.
|
||||||
|
encoding : Encoding, default=Encoding.LITTLE_ENDIAN
|
||||||
|
Endianness of the generated IPC payload.
|
||||||
|
message_type : MessageType, default=MessageType.ASYNCHRONOUS
|
||||||
|
IPC message type tag written into the frame header.
|
||||||
|
compression : Compression, default=Compression.UNCOMPRESSED
|
||||||
|
Compression mode applied to the payload.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bytes
|
||||||
|
Encoded q IPC payload.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Atom(Value):
|
||||||
|
"""Scalar q value (e.g., integer, float, symbol)."""
|
||||||
|
def as_py(self) -> object:
|
||||||
|
"""Convert the atom to a native Python type."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def value(self) -> object:
|
||||||
|
"""The atom's native Python value (equivalent to ``as_py()``)."""
|
||||||
|
...
|
||||||
|
def is_null(self) -> bool:
|
||||||
|
"""Return ``True`` if this atom holds the q null sentinel for its type."""
|
||||||
|
...
|
||||||
|
def is_infinite(self) -> bool:
|
||||||
|
"""``True`` if this atom is a q infinity sentinel (±∞)."""
|
||||||
|
...
|
||||||
|
def __arrow_c_array__(
|
||||||
|
self, requested_schema: object | None = None, /
|
||||||
|
) -> object:
|
||||||
|
"""Export the atom as an Arrow array via the PyCapsule Protocol."""
|
||||||
|
...
|
||||||
|
def __repr__(self) -> str: ...
|
||||||
|
def __str__(self) -> str: ...
|
||||||
|
|
||||||
|
|
||||||
|
class Vector(Value):
|
||||||
|
"""Homogeneous q list of primitive values."""
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""Return the length of the vector."""
|
||||||
|
...
|
||||||
|
def __iter__(self) -> Iterator[object]:
|
||||||
|
"""Iterate over the elements of the vector."""
|
||||||
|
...
|
||||||
|
def __getitem__(self, index: int, /) -> object:
|
||||||
|
"""Get an element by index."""
|
||||||
|
...
|
||||||
|
def to_list(self) -> list[object]:
|
||||||
|
"""Convert the vector to a Python list."""
|
||||||
|
...
|
||||||
|
def __arrow_c_array__(
|
||||||
|
self, requested_schema: object | None = None, /
|
||||||
|
) -> object:
|
||||||
|
"""Export the vector as an Arrow array via the PyCapsule Protocol."""
|
||||||
|
...
|
||||||
|
def __repr__(self) -> str: ...
|
||||||
|
def __str__(self) -> str: ...
|
||||||
|
|
||||||
|
|
||||||
|
class List(Value):
|
||||||
|
"""Heterogeneous/mixed q list of general values."""
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""Return the length of the list."""
|
||||||
|
...
|
||||||
|
def __iter__(self) -> Iterator[Value]:
|
||||||
|
"""Iterate over the elements of the list."""
|
||||||
|
...
|
||||||
|
def __getitem__(self, index: int, /) -> Value:
|
||||||
|
"""Get an element by index."""
|
||||||
|
...
|
||||||
|
def to_list(self) -> list[Value]:
|
||||||
|
"""Convert the list to a Python list of values."""
|
||||||
|
...
|
||||||
|
def __arrow_c_array__(
|
||||||
|
self, requested_schema: object | None = None, /
|
||||||
|
) -> object:
|
||||||
|
"""Export the list as an Arrow array via the PyCapsule Protocol."""
|
||||||
|
...
|
||||||
|
def __repr__(self) -> str: ...
|
||||||
|
def __str__(self) -> str: ...
|
||||||
|
|
||||||
|
|
||||||
|
class Dictionary(Value):
|
||||||
|
"""q dictionary mapping keys to values."""
|
||||||
|
@property
|
||||||
|
def keys(self) -> Value:
|
||||||
|
"""The dictionary keys."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def values(self) -> Value:
|
||||||
|
"""The dictionary values."""
|
||||||
|
...
|
||||||
|
def __len__(self) -> int:
|
||||||
|
"""Return the number of key-value pairs."""
|
||||||
|
...
|
||||||
|
def __arrow_c_array__(
|
||||||
|
self, requested_schema: object | None = None, /
|
||||||
|
) -> object:
|
||||||
|
"""Export the dictionary as an Arrow StructArray via the PyCapsule Protocol."""
|
||||||
|
...
|
||||||
|
def __repr__(self) -> str: ...
|
||||||
|
def __str__(self) -> str: ...
|
||||||
|
|
||||||
|
|
||||||
|
class Table(Value):
|
||||||
|
"""q table representing tabular data."""
|
||||||
|
@property
|
||||||
|
def columns(self) -> list[str]:
|
||||||
|
"""The column names."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def num_rows(self) -> int:
|
||||||
|
"""The number of rows."""
|
||||||
|
...
|
||||||
|
@property
|
||||||
|
def num_columns(self) -> int:
|
||||||
|
"""The number of columns."""
|
||||||
|
...
|
||||||
|
def column(self, name: str, /) -> Value:
|
||||||
|
"""Get a column by name."""
|
||||||
|
...
|
||||||
|
def __arrow_c_stream__(
|
||||||
|
self, requested_schema: object | None = None, /
|
||||||
|
) -> object:
|
||||||
|
"""Export the table as an Arrow stream via the PyCapsule Protocol."""
|
||||||
|
...
|
||||||
|
def __repr__(self) -> str: ...
|
||||||
|
def __str__(self) -> str: ...
|
||||||
1
python/qroissant/py.typed
Normal file
1
python/qroissant/py.typed
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue