Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ test-log = { version = "0.2.15", default-features = false, features = [
test-strategy = "0.3.1"
thiserror = "1.0.65"
thiserror-context = "0.1.1"
tikv-jemallocator = "0.6.0"
tokio = "1.36.0"
tokio-stream = "0.1.14"
tokio-test = "0.4.3"
Expand Down
5 changes: 5 additions & 0 deletions linera-service/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ metrics = [
"linera-faucet-server/metrics",
"linera-metrics",
]
jemalloc = ["tikv-jemallocator"]
storage-service = ["linera-storage-service"]

[dependencies]
Expand Down Expand Up @@ -119,6 +120,10 @@ serde_json.workspace = true
stdext = { workspace = true, optional = true }
tempfile.workspace = true
thiserror.workspace = true
tikv-jemallocator = { workspace = true, features = [
"profiling",
"unprefixed_malloc_on_supported_platforms",
], optional = true }
tokio = { workspace = true, features = ["full"] }
tokio-stream.workspace = true
tokio-util.workspace = true
Expand Down
4 changes: 4 additions & 0 deletions linera-service/src/cli/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

#![recursion_limit = "256"]

#[cfg(feature = "jemalloc")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

use std::{
collections::{BTreeMap, BTreeSet},
env,
Expand Down
4 changes: 4 additions & 0 deletions linera-service/src/proxy/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
// Copyright (c) Zefchain Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

#[cfg(feature = "jemalloc")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

use std::{net::SocketAddr, path::PathBuf, time::Duration};

use anyhow::{anyhow, bail, ensure, Result};
Expand Down
4 changes: 4 additions & 0 deletions linera-service/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
// Copyright (c) Zefchain Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

#[cfg(feature = "jemalloc")]
#[global_allocator]
static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

use std::{
borrow::Cow,
num::NonZeroU16,
Expand Down
72 changes: 51 additions & 21 deletions linera-views/src/backends/rocks_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::{
};

use linera_base::ensure;
use rocksdb::{BlockBasedOptions, Cache, DBCompactionStyle};
use rocksdb::{BlockBasedOptions, Cache, DBCompactionStyle, SliceTransform};
use serde::{Deserialize, Serialize};
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind, System};
use tempfile::TempDir;
Expand Down Expand Up @@ -166,25 +166,38 @@ impl RocksDbStoreExecutor {
Ok(entries.into_iter().collect::<Result<_, _>>()?)
}

fn get_find_prefix_iterator(&self, prefix: &[u8]) -> rocksdb::DBRawIteratorWithThreadMode<DB> {
// Configure ReadOptions optimized for SSDs and iterator performance
let mut read_opts = rocksdb::ReadOptions::default();
// Enable async I/O for better concurrency
read_opts.set_async_io(true);

// Set precise upper bound to minimize key traversal
let upper_bound = get_upper_bound_option(prefix);
if let Some(upper_bound) = upper_bound {
read_opts.set_iterate_upper_bound(upper_bound);
}

let mut iter = self.db.raw_iterator_opt(read_opts);
iter.seek(prefix);
iter
}

fn find_keys_by_prefix_internal(
&self,
key_prefix: Vec<u8>,
) -> Result<Vec<Vec<u8>>, RocksDbStoreInternalError> {
check_key_size(&key_prefix)?;

let mut prefix = self.start_key.clone();
prefix.extend(key_prefix);
let len = prefix.len();
let mut iter = self.db.raw_iterator();

let mut iter = self.get_find_prefix_iterator(&prefix);
let mut keys = Vec::new();
iter.seek(&prefix);
let mut next_key = iter.key();
while let Some(key) = next_key {
if !key.starts_with(&prefix) {
break;
}
while let Some(key) = iter.key() {
keys.push(key[len..].to_vec());
iter.next();
next_key = iter.key();
}
Ok(keys)
}
Expand All @@ -198,20 +211,13 @@ impl RocksDbStoreExecutor {
let mut prefix = self.start_key.clone();
prefix.extend(key_prefix);
let len = prefix.len();
let mut iter = self.db.raw_iterator();

let mut iter = self.get_find_prefix_iterator(&prefix);
let mut key_values = Vec::new();
iter.seek(&prefix);
let mut next_key = iter.key();
while let Some(key) = next_key {
if !key.starts_with(&prefix) {
break;
}
if let Some(value) = iter.value() {
let key_value = (key[len..].to_vec(), value.to_vec());
key_values.push(key_value);
}
while let Some((key, value)) = iter.item() {
let key_value = (key[len..].to_vec(), value.to_vec());
key_values.push(key_value);
iter.next();
next_key = iter.key();
}
Ok(key_values)
}
Expand Down Expand Up @@ -373,8 +379,32 @@ impl RocksDbStoreInternal {
total_ram / 4,
HYPER_CLOCK_CACHE_BLOCK_SIZE,
));

// Configure bloom filters for prefix iteration optimization
block_options.set_bloom_filter(10.0, false);
block_options.set_whole_key_filtering(false);

// 32KB blocks instead of default 4KB - reduces iterator seeks
block_options.set_block_size(32 * 1024);
// Use latest format for better compression and performance
block_options.set_format_version(5);

options.set_block_based_table_factory(&block_options);

// Configure prefix extraction for bloom filter optimization
// Use 8 bytes: ROOT_KEY_DOMAIN (1 byte) + BCS variant (1-2 bytes) + identifier start (4-5 bytes)
let prefix_extractor = SliceTransform::create_fixed_prefix(8);
options.set_prefix_extractor(prefix_extractor);

// 12.5% of memtable size for bloom filter
options.set_memtable_prefix_bloom_ratio(0.125);
// Skip bloom filter for memtable when key exists
options.set_optimize_filters_for_hits(true);
// Use memory-mapped files for faster reads
options.set_allow_mmap_reads(true);
// Don't use random access pattern since we do prefix scans
options.set_advise_random_on_open(false);

let db = DB::open(&options, path_buf)?;
let executor = RocksDbStoreExecutor {
db: Arc::new(db),
Expand Down
Loading