Skip to content
Merged
Show file tree
Hide file tree
Changes from 53 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
31d109a
Draft RowBinaryWNAT/Native header parser
slvrtrn May 7, 2025
3a66d7a
Add RBWNAT header parser
slvrtrn May 8, 2025
cf72759
RBWNAT deserializer WIP
slvrtrn May 13, 2025
5a60295
RBWNAT deserializer - more types WIP
slvrtrn May 14, 2025
b338d88
RBWNAT deserializer - validation WIP
slvrtrn May 18, 2025
8ae3629
RBWNAT deserializer - validation WIP
slvrtrn May 19, 2025
acced9e
Merge branch 'main' into row-binary-header-check
slvrtrn May 20, 2025
c20af77
RBWNAT deserializer - validation, benches WIP
slvrtrn May 21, 2025
c4a608e
RBWNAT deserializer - improve performance
slvrtrn May 22, 2025
0d416cf
RBWNAT deserializer - clearer error messages on panics
slvrtrn May 23, 2025
65cb92f
Fix clippy and build
slvrtrn May 23, 2025
fbfbd99
Fix core::mem::size_of import
slvrtrn May 23, 2025
1d5c01a
Slightly faster implementation
slvrtrn May 26, 2025
227617e
Add Geo types, more tests
slvrtrn May 27, 2025
986643f
Support root level tuples for fetch
slvrtrn May 28, 2025
b26006e
Add Variant support, improve validation, tests
slvrtrn May 28, 2025
8567200
Fix compile issues, clippy, etc
slvrtrn May 28, 2025
a1181a0
Fix older Rust versions compile issues, docs
slvrtrn May 28, 2025
b77f45d
Merge remote-tracking branch 'origin' into row-binary-header-check
slvrtrn May 29, 2025
04c7a20
Add NYC benchmark
slvrtrn May 29, 2025
1f6c9e6
Add compression to the NYC benchmark
slvrtrn May 29, 2025
9bafc9a
Add more tests
slvrtrn Jun 4, 2025
c53ba74
Support structs with different field order via MapAccess
slvrtrn Jun 4, 2025
00ff574
Add more tests
slvrtrn Jun 4, 2025
bd71a77
Add more tests, `execute_statements` helper
slvrtrn Jun 6, 2025
6ba6abf
More optimal struct name/fields acquisition, cleanup
slvrtrn Jun 7, 2025
fb49a24
Temporarily allow unreachable items
slvrtrn Jun 7, 2025
52d0953
Add chrono feature to RBWNAT tests
slvrtrn Jun 7, 2025
5ffae76
Allow root primitives, rework benchmarks, address (most of) PR feedback
slvrtrn Jun 8, 2025
a922d0d
Add LZ4 feature flag
slvrtrn Jun 8, 2025
90132cb
Support proper validation for `(Row, P1, P2, ...)` fetching
slvrtrn Jun 8, 2025
49af48c
Use Cargo workspaces, update benchmarks and docs
slvrtrn Jun 9, 2025
926213b
Fix examples schema mismatch
slvrtrn Jun 9, 2025
da08827
Bring back `Vec<(K, V)>` for maps, more tests, fix clippy
slvrtrn Jun 9, 2025
1b893a8
Fix mocked select benchmark
slvrtrn Jun 9, 2025
14f8550
Fix mocked insert benchmark
slvrtrn Jun 9, 2025
5509b12
Fix the rest of the examples, add a simple sanity check
slvrtrn Jun 9, 2025
38d771d
Clippy fixes
slvrtrn Jun 9, 2025
446eb7c
Don't use Result as validation always panics
slvrtrn Jun 9, 2025
fc9a49b
Merge remote-tracking branch 'origin' into row-binary-header-check
slvrtrn Jun 9, 2025
19760f3
Bring back Unsupported error kind
slvrtrn Jun 9, 2025
5f51dc7
Remove examples runner from the `it` directory
slvrtrn Jun 9, 2025
8f3f3b2
Ignore an odd test
slvrtrn Jun 9, 2025
d189a78
Add CI workflow dispatch and all PR trigger
slvrtrn Jun 10, 2025
ccfac33
Further optimizations, remove validation_mode, remove schema from mocks
slvrtrn Jun 10, 2025
1544b7b
Make validation slightly faster again
slvrtrn Jun 11, 2025
b7b45c5
Address PR feedback
slvrtrn Jun 17, 2025
bd99890
Merge remote-tracking branch 'origin' into row-binary-header-check
slvrtrn Jun 19, 2025
bcc1e46
Resolve merge conflicts
slvrtrn Jun 19, 2025
a879945
fix cargo fmt
slvrtrn Jun 19, 2025
b094dd0
Fix docs, tests
slvrtrn Jun 19, 2025
c449ee2
Update CHANGELOG.md, README.md
slvrtrn Jun 20, 2025
e1706f4
Update client usage with mocks
slvrtrn Jun 20, 2025
3c08c77
chore: stop using nightly-only features of rustfmt
loyd Jun 21, 2025
244d587
refactor(rowbinary/de): dedup code
loyd Jun 21, 2025
d5af0b8
Address PR feedback
slvrtrn Jun 23, 2025
ee8f37e
Merge remote-tracking branch 'origin' into row-binary-header-check
slvrtrn Jun 23, 2025
6d0e771
Update CHANGELOG.md
slvrtrn Jun 23, 2025
9f495e2
Add missing env variables to docker compose
slvrtrn Jun 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

env:
CARGO_TERM_COLOR: always
Expand Down
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased] - ReleaseDate

### Breaking Changes

- query: `RowBinaryWithNamesAndTypes` is now used by default for query results. This may cause panics if the row struct
definition does not match the database schema. Use `Client::with_validation(false)` to revert to the previous behavior
which uses plain `RowBinary` format for fetching rows. ([#221])
- query: due to `RowBinaryWithNamesAndTypes` format usage, there might be an impact on fetch performance, which largely
depends on how the dataset is defined. If you experience performance issues, consider disabling validation by using
`Client::with_validation(false)`.
- mock: when using `test-util` feature, it is now required to use `Client::with_mock(&mock)` to set up the mock server,
so it properly handles the response format and automatically disables parsing `RowBinaryWithNamesAndTypes` header
parsing and validation. Additionally, it is not required to call `with_url` explicitly.
See the [updated example](./examples/mock.rs).

### Added

- client: added `Client::with_validation` builder method. Validation is enabled by default, meaning that
`RowBinaryWithNamesAndTypes` format will be used to fetch rows from the database. If validation is disabled,
`RowBinary` format will be used, similarly to the previous versions. ([#221]).
- types: a new crate `clickhouse-types` was added to the project workspace. This crate is required for
`RowBinaryWithNamesAndTypes` struct definition validation, as it contains ClickHouse data types AST, as well as
functions and utilities to parse the types out of the ClickHouse server response. ([#221]).

[#221]: https://github.com/ClickHouse/clickhouse-rs/pull/221

## [0.13.3] - 2025-05-29
### Added
- client: added `Client::with_access_token` to support JWT authentication ClickHouse Cloud feature ([#215]).
Expand Down
26 changes: 22 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,21 @@ homepage = "https://clickhouse.com"
license = "MIT OR Apache-2.0"
readme = "README.md"
edition = "2021"
# update `derive/Cargo.toml` and CI if changed
# update `workspace.package.rust-version` below and CI if changed
# TODO: after bumping to v1.80, remove `--precise` in the "msrv" CI job
rust-version = "1.73.0"

[workspace]
members = ["derive", "types"]

[workspace.package]
authors = ["ClickHouse Contributors", "Paul Loyd <pavelko95@gmail.com>"]
repository = "https://github.com/ClickHouse/clickhouse-rs"
homepage = "https://clickhouse.com"
edition = "2021"
license = "MIT OR Apache-2.0"
rust-version = "1.73.0"

[lints.rust]
rust_2018_idioms = { level = "warn", priority = -1 }
unreachable_pub = "warn"
Expand All @@ -26,16 +37,21 @@ undocumented_unsafe_blocks = "warn"
all-features = true
rustdoc-args = ["--cfg", "docsrs"]

[[bench]]
name = "select_nyc_taxi_data"
harness = false
required-features = ["time"]

[[bench]]
name = "select_numbers"
harness = false

[[bench]]
name = "insert"
name = "mocked_insert"
harness = false

[[bench]]
name = "select"
name = "mocked_select"
harness = false

[[example]]
Expand Down Expand Up @@ -98,6 +114,7 @@ rustls-tls-native-roots = [

[dependencies]
clickhouse-derive = { version = "0.2.0", path = "derive" }
clickhouse-types = { version = "0.1.0", path = "types" }

thiserror = "2.0"
serde = "1.0.106"
Expand Down Expand Up @@ -131,6 +148,7 @@ quanta = { version = "0.12", optional = true }
replace_with = { version = "0.1.7" }

[dev-dependencies]
clickhouse-derive = { version = "0.2.0", path = "derive" }
criterion = "0.6"
serde = { version = "1.0.106", features = ["derive"] }
tokio = { version = "1.0.1", features = ["full", "test-util"] }
Expand All @@ -139,6 +157,6 @@ serde_bytes = "0.11.4"
serde_json = "1"
serde_repr = "0.1.7"
uuid = { version = "1", features = ["v4", "serde"] }
time = { version = "0.3.17", features = ["macros", "rand"] }
time = { version = "0.3.17", features = ["macros", "rand", "parsing"] }
fixnum = { version = "0.9.2", features = ["serde", "i32", "i64", "i128"] }
rand = { version = "0.9", features = ["small_rng"] }
160 changes: 110 additions & 50 deletions README.md

Large diffs are not rendered by default.

30 changes: 23 additions & 7 deletions benches/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,41 @@ All cases are run with `cargo bench --bench <case>`.

## With a mocked server

These benchmarks are run against a mocked server, which is a simple HTTP server that responds with a fixed response. This is useful to measure the overhead of the client itself:
* `select` checks throughput of `Client::query()`.
* `insert` checks throughput of `Client::insert()` and `Client::inserter()` (if the `inserter` features is enabled).
These benchmarks are run against a mocked server, which is a simple HTTP server that responds with a fixed response.
This is useful to measure the overhead of the client itself.

### Scenarios

* [mocked_select](mocked_select.rs) checks throughput of `Client::query()`.
* [mocked_insert](mocked_insert.rs) checks throughput of `Client::insert()` and `Client::inserter()`
(requires `inserter` feature).

### How to collect perf data

The crate's code runs on the thread with the name `testee`:

```bash
cargo bench --bench <name> &
perf record -p `ps -AT | grep testee | awk '{print $2}'` --call-graph dwarf,65528 --freq 5000 -g -- sleep 5
perf script > perf.script
```

Then upload the `perf.script` file to [Firefox Profiler](https://profiler.firefox.com).
Then upload the `perf.script` file to [Firefox Profiler].

## With a running ClickHouse server

These benchmarks are run against a real ClickHouse server, so it must be started:

```bash
docker compose up -d
cargo bench --bench <case>
```

Cases:
* `select_numbers` measures time of running a big SELECT query to the `system.numbers_mt` table.
### Scenarios

* [select_numbers.rs](select_numbers.rs) measures time of running a big SELECT query to the `system.numbers_mt` table.
* [select_nyc_taxi_data.rs](select_nyc_taxi_data.rs) measures time of running a fairly large SELECT query (approximately
3 million records) to the `nyc_taxi_data` table using the [NYC taxi dataset].

### How to collect perf data

Expand All @@ -38,4 +48,10 @@ perf record -p `ps -AT | grep <name> | awk '{print $2}'` --call-graph dwarf,6552
perf script > perf.script
```

Then upload the `perf.script` file to [Firefox Profiler](https://profiler.firefox.com).
Then upload the `perf.script` file to [Firefox Profiler].

<!-- links -->

[Firefox Profiler]: https://profiler.firefox.com

[NYC taxi dataset]: https://clickhouse.com/docs/getting-started/example-datasets/nyc-taxi#create-the-table-trips
61 changes: 46 additions & 15 deletions benches/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::{
};

use bytes::Bytes;
use clickhouse::error::Result;
use futures::stream::StreamExt;
use http_body_util::BodyExt;
use hyper::{
Expand All @@ -25,35 +26,65 @@ use tokio::{
sync::{mpsc, oneshot},
};

use clickhouse::error::Result;
pub(crate) struct ServerHandle {
handle: Option<thread::JoinHandle<()>>,
shutdown_tx: Option<oneshot::Sender<()>>,
}

pub(crate) struct ServerHandle;
impl ServerHandle {
fn shutdown(&mut self) {
if let Some(tx) = self.shutdown_tx.take() {
tx.send(()).unwrap();
}
if let Some(handle) = self.handle.take() {
handle.join().unwrap();
}
}
}

pub(crate) fn start_server<S, F, B>(addr: SocketAddr, serve: S) -> ServerHandle
impl Drop for ServerHandle {
fn drop(&mut self) {
self.shutdown();
}
}

pub(crate) async fn start_server<S, F, B>(addr: SocketAddr, serve: S) -> ServerHandle
where
S: Fn(Request<Incoming>) -> F + Send + Sync + 'static,
F: Future<Output = Response<B>> + Send,
B: Body<Data = Bytes, Error = Infallible> + Send + 'static,
{
let (shutdown_tx, mut shutdown_rx) = oneshot::channel::<()>();
let (ready_tx, ready_rx) = oneshot::channel::<()>();

let serving = async move {
let listener = TcpListener::bind(addr).await.unwrap();
ready_tx.send(()).unwrap();

loop {
let (stream, _) = listener.accept().await.unwrap();

let service =
service::service_fn(|request| async { Ok::<_, Infallible>(serve(request).await) });

// SELECT benchmark doesn't read the whole body, so ignore possible errors.
let _ = conn::http1::Builder::new()
let server_future = conn::http1::Builder::new()
.timer(TokioTimer::new())
.serve_connection(TokioIo::new(stream), service)
.await;
.serve_connection(
TokioIo::new(stream),
service::service_fn(|request| async {
Ok::<_, Infallible>(serve(request).await)
}),
);
tokio::select! {
_ = server_future => {}
_ = &mut shutdown_rx => { break; }
}
}
};

run_on_st_runtime("server", serving);
ServerHandle
let handle = Some(run_on_st_runtime("server", serving));
ready_rx.await.unwrap();

ServerHandle {
handle,
shutdown_tx: Some(shutdown_tx),
}
}

pub(crate) async fn skip_incoming(request: Request<Incoming>) {
Expand Down Expand Up @@ -105,7 +136,7 @@ pub(crate) fn start_runner() -> RunnerHandle {
RunnerHandle { tx }
}

fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) {
fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) -> thread::JoinHandle<()> {
let name = name.to_string();
thread::Builder::new()
.name(name.clone())
Expand All @@ -121,5 +152,5 @@ fn run_on_st_runtime(name: &str, f: impl Future + Send + 'static) {
.unwrap()
.block_on(f);
})
.unwrap();
.unwrap()
}
Loading