1. 概述
本文档汇总了RocksDB在生产环境中的实战经验和最佳实践,涵盖性能调优、配置优化、监控告警、故障排查等方面。这些经验来自于大规模部署和长期运维的实践总结。
2. 性能调优最佳实践
2.1 写入性能优化
2.1.1 MemTable配置优化
// 高写入负载的MemTable配置
Options options;
// 增大MemTable大小,减少刷新频率
options.write_buffer_size = 128 * 1024 * 1024; // 128MB(默认64MB)
// 增加MemTable数量,提供更多写入缓冲
options.max_write_buffer_number = 6; // 6个(默认2个)
// 设置合并阈值,避免过多小文件
options.min_write_buffer_number_to_merge = 2;
// 全局写缓冲区限制,防止内存使用过多
options.db_write_buffer_size = 512 * 1024 * 1024; // 512MB总限制
// 选择合适的MemTable实现
// 对于随机写入,使用默认的SkipList
options.memtable_factory.reset(new SkipListFactory);
// 对于有前缀模式的写入,使用HashSkipList
if (has_prefix_pattern) {
options.prefix_extractor.reset(NewFixedPrefixTransform(4));
options.memtable_factory.reset(NewHashSkipListRepFactory(16));
}
2.1.2 WAL配置优化
// WAL性能优化配置
Options options;
// 异步写入WAL,提高写入吞吐量(但可能丢失最后几个写入)
WriteOptions write_opts;
write_opts.sync = false; // 不强制同步到磁盘
write_opts.disableWAL = false; // 保留WAL以保证持久性
// 批量写入优化
write_opts.no_slowdown = false; // 允许写入减速而不是失败
// WAL文件大小配置
options.max_total_wal_size = 1024 * 1024 * 1024; // 1GB WAL总大小限制
options.wal_ttl_seconds = 0; // WAL文件TTL(0表示无限制)
options.wal_size_limit_mb = 0; // WAL文件大小限制(0表示无限制)
// WAL目录配置(建议使用SSD)
options.wal_dir = "/fast_ssd/rocksdb_wal"; // 将WAL放在快速存储上
2.1.3 批量写入最佳实践
// 批量写入示例
void BatchWriteExample(DB* db) {
const int batch_size = 1000;
const int total_records = 100000;
WriteOptions write_opts;
write_opts.sync = false; // 异步写入
for (int i = 0; i < total_records; i += batch_size) {
WriteBatch batch;
// 预留空间,减少内存重分配
batch.Clear();
for (int j = 0; j < batch_size && (i + j) < total_records; j++) {
std::string key = "key" + std::to_string(i + j);
std::string value = "value" + std::to_string(i + j);
batch.Put(key, value);
}
// 批量提交
Status s = db->Write(write_opts, &batch);
if (!s.ok()) {
// 错误处理
std::cerr << "批量写入失败: " << s.ToString() << std::endl;
break;
}
// 定期同步(每10个批次)
if ((i / batch_size) % 10 == 0) {
WriteOptions sync_opts;
sync_opts.sync = true;
WriteBatch empty_batch;
db->Write(sync_opts, &empty_batch); // 强制同步
}
}
}
2.2 读取性能优化
2.2.1 缓存配置优化
// 读取性能优化的缓存配置
Options options;
// 配置块缓存(建议设置为可用内存的25-50%)
std::shared_ptr<Cache> cache = NewLRUCache(
2 * 1024 * 1024 * 1024, // 2GB缓存
8, // 8个分片,减少锁竞争
false, // 非严格容量限制
0.5 // 50%高优先级池
);
BlockBasedTableOptions table_options;
table_options.block_cache = cache;
table_options.cache_index_and_filter_blocks = true; // 缓存索引和过滤器
table_options.pin_l0_filter_and_index_blocks_in_cache = true; // 固定L0索引
// 配置布隆过滤器
table_options.filter_policy.reset(NewBloomFilterPolicy(
10, // 每个键10位,误判率约1%
false // 使用完整过滤器
));
// 块大小优化(默认4KB,可根据访问模式调整)
table_options.block_size = 16 * 1024; // 16KB,适合范围查询
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
// 表缓存配置
options.max_open_files = 10000; // 增加打开文件数限制
2.2.2 读取选项优化
// 读取性能优化示例
void OptimizedReadExample(DB* db) {
ReadOptions read_opts;
// 基础优化
read_opts.verify_checksums = false; // 跳过校验和验证(提高性能)
read_opts.fill_cache = true; // 填充缓存
// 迭代器优化
read_opts.tailing = false; // 非尾随迭代器
read_opts.readahead_size = 2 * 1024 * 1024; // 2MB预读
// 前缀查询优化
if (use_prefix_seek) {
read_opts.prefix_same_as_start = true; // 前缀优化
read_opts.total_order_seek = false; // 非全序查询
}
// 批量读取示例
std::vector<std::string> keys = {"key1", "key2", "key3", "key4", "key5"};
std::vector<std::string> values(keys.size());
std::vector<Status> statuses(keys.size());
// 使用MultiGet进行批量读取
std::vector<Slice> key_slices;
for (const auto& key : keys) {
key_slices.emplace_back(key);
}
db->MultiGet(read_opts, key_slices, &values, &statuses);
// 处理结果
for (size_t i = 0; i < keys.size(); i++) {
if (statuses[i].ok()) {
std::cout << keys[i] << " = " << values[i] << std::endl;
} else if (statuses[i].IsNotFound()) {
std::cout << keys[i] << " 未找到" << std::endl;
} else {
std::cerr << keys[i] << " 读取错误: " << statuses[i].ToString() << std::endl;
}
}
}
2.3 压缩性能优化
2.3.1 Level压缩配置
// Level压缩性能优化配置
Options options;
// 基础层级配置
options.level_compaction_dynamic_level_bytes = true; // 动态层级大小
options.level0_file_num_compaction_trigger = 4; // L0触发阈值
options.level0_slowdown_writes_trigger = 20; // 写入减速阈值
options.level0_stop_writes_trigger = 36; // 写入停止阈值
// 层级大小配置
options.max_bytes_for_level_base = 512 * 1024 * 1024; // L1: 512MB
options.max_bytes_for_level_multiplier = 8; // 8倍增长
options.target_file_size_base = 128 * 1024 * 1024; // 128MB文件
options.target_file_size_multiplier = 1; // 文件大小不变
// 压缩线程配置
options.max_background_compactions = 8; // 8个压缩线程
options.max_subcompactions = 4; // 4个子压缩
options.max_background_flushes = 2; // 2个刷新线程
// 压缩优先级
options.compaction_pri = kMinOverlappingRatio; // 最小重叠优先
// 压缩算法配置
options.compression = kLZ4Compression; // L0-L2使用LZ4
options.compression_per_level.resize(options.num_levels);
for (int i = 0; i < options.num_levels; i++) {
if (i < 2) {
options.compression_per_level[i] = kLZ4Compression; // 快速压缩
} else {
options.compression_per_level[i] = kZSTDCompression; // 高压缩比
}
}
2.3.2 Universal压缩配置
// Universal压缩配置(适合写入密集型工作负载)
Options options;
options.compaction_style = kCompactionStyleUniversal;
// Universal压缩选项
CompactionOptionsUniversal universal_options;
universal_options.size_ratio = 1; // 1%大小比例
universal_options.min_merge_width = 2; // 最小合并宽度
universal_options.max_merge_width = 10; // 最大合并宽度
universal_options.max_size_amplification_percent = 200; // 200%空间放大
universal_options.compression_size_percent = 50; // 50%压缩阈值
universal_options.stop_style = kCompactionStopStyleTotalSize; // 停止策略
options.compaction_options_universal = universal_options;
// 其他配置
options.level0_file_num_compaction_trigger = 2; // 2个文件触发
options.write_buffer_size = 256 * 1024 * 1024; // 256MB MemTable
options.max_write_buffer_number = 4; // 4个MemTable
3. 配置调优指南
3.1 硬件配置建议
3.1.1 CPU配置
# CPU配置建议
# - 核心数:建议16-32核心,支持高并发
# - 频率:建议2.4GHz以上
# - 缓存:建议L3缓存32MB以上
# RocksDB线程配置
max_background_jobs = min(16, cpu_cores)
max_background_compactions = max_background_jobs * 0.75
max_background_flushes = max_background_jobs * 0.25
max_subcompactions = min(4, cpu_cores / 4)
3.1.2 内存配置
// 内存配置计算公式
size_t total_memory = GetTotalSystemMemory();
size_t rocksdb_memory = total_memory * 0.6; // 60%给RocksDB
// 内存分配建议
size_t block_cache_size = rocksdb_memory * 0.5; // 50%给块缓存
size_t write_buffer_total = rocksdb_memory * 0.25; // 25%给写缓冲
size_t table_cache_size = rocksdb_memory * 0.05; // 5%给表缓存
// 剩余20%给操作系统和其他组件
Options options;
options.db_write_buffer_size = write_buffer_total;
options.write_buffer_size = write_buffer_total / 4; // 单个MemTable大小
// 配置块缓存
auto cache = NewLRUCache(block_cache_size, 8);
BlockBasedTableOptions table_options;
table_options.block_cache = cache;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
3.1.3 存储配置
# 存储配置建议
# SSD配置(推荐)
# - 类型:NVMe SSD > SATA SSD > HDD
# - 容量:建议预留30%空间
# - IOPS:建议10000+ IOPS
# - 延迟:建议<1ms
# 文件系统配置
# - 推荐ext4或xfs
# - 挂载选项:noatime,nobarrier
mount -o noatime,nobarrier /dev/nvme0n1 /data/rocksdb
# 目录结构建议
/data/rocksdb/
├── db/ # 数据库文件(可以是HDD)
├── wal/ # WAL文件(建议SSD)
└── backup/ # 备份文件(可以是HDD)
3.2 操作系统调优
3.2.1 内核参数调优
# /etc/sysctl.conf 配置
# 虚拟内存配置
vm.swappiness = 1 # 减少swap使用
vm.dirty_ratio = 15 # 脏页比例
vm.dirty_background_ratio = 5 # 后台刷新比例
vm.dirty_expire_centisecs = 12000 # 脏页过期时间
# 文件系统配置
fs.file-max = 1000000 # 最大文件句柄数
fs.nr_open = 1000000 # 进程最大文件句柄数
# 网络配置(如果使用网络存储)
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 87380 16777216
net.ipv4.tcp_wmem = 4096 65536 16777216
# 应用配置
sysctl -p
3.2.2 进程限制配置
# /etc/security/limits.conf 配置
rocksdb soft nofile 1000000
rocksdb hard nofile 1000000
rocksdb soft nproc 32768
rocksdb hard nproc 32768
rocksdb soft memlock unlimited
rocksdb hard memlock unlimited
# systemd服务配置
# /etc/systemd/system/rocksdb.service
[Unit]
Description=RocksDB Service
After=network.target
[Service]
Type=simple
User=rocksdb
Group=rocksdb
LimitNOFILE=1000000
LimitNPROC=32768
LimitMEMLOCK=infinity
ExecStart=/usr/local/bin/rocksdb_server
Restart=always
[Install]
WantedBy=multi-user.target
4. 监控与告警
4.1 关键指标监控
4.1.1 性能指标
// 性能监控指标收集
class RocksDBMonitor {
public:
struct Metrics {
// 写入指标
uint64_t write_ops_per_sec;
uint64_t write_bytes_per_sec;
double write_latency_p99;
// 读取指标
uint64_t read_ops_per_sec;
uint64_t read_bytes_per_sec;
double read_latency_p99;
// 压缩指标
uint64_t compaction_pending;
uint64_t compaction_running;
double compaction_cpu_usage;
// 内存指标
uint64_t memtable_usage;
uint64_t block_cache_usage;
uint64_t table_cache_usage;
// 磁盘指标
uint64_t sst_file_count;
uint64_t total_sst_size;
uint64_t level0_file_count;
};
Metrics CollectMetrics(DB* db) {
Metrics metrics = {};
// 获取统计信息
std::string stats;
db->GetProperty("rocksdb.stats", &stats);
// 获取内存使用情况
std::string memory_usage;
db->GetProperty("rocksdb.approximate-memory-usage", &memory_usage);
metrics.memtable_usage = std::stoull(memory_usage);
// 获取压缩状态
std::string compaction_pending;
db->GetProperty("rocksdb.compaction-pending", &compaction_pending);
metrics.compaction_pending = std::stoull(compaction_pending);
// 获取文件数量
std::string num_files_at_level0;
db->GetProperty("rocksdb.num-files-at-level0", &num_files_at_level0);
metrics.level0_file_count = std::stoull(num_files_at_level0);
return metrics;
}
void ReportMetrics(const Metrics& metrics) {
// 发送到监控系统(如Prometheus、InfluxDB等)
prometheus_client_.Counter("rocksdb_write_ops_total")
.Increment(metrics.write_ops_per_sec);
prometheus_client_.Gauge("rocksdb_memtable_usage_bytes")
.Set(metrics.memtable_usage);
prometheus_client_.Gauge("rocksdb_level0_files")
.Set(metrics.level0_file_count);
}
};
4.1.2 告警规则配置
# Prometheus告警规则配置
groups:
- name: rocksdb_alerts
rules:
# 写入延迟告警
- alert: RocksDBHighWriteLatency
expr: rocksdb_write_latency_p99 > 100
for: 5m
labels:
severity: warning
annotations:
summary: "RocksDB写入延迟过高"
description: "RocksDB P99写入延迟 {{ $value }}ms 超过100ms阈值"
# Level0文件数量告警
- alert: RocksDBTooManyL0Files
expr: rocksdb_level0_files > 20
for: 2m
labels:
severity: critical
annotations:
summary: "RocksDB Level0文件过多"
description: "Level0文件数量 {{ $value }} 超过20个,可能导致写入阻塞"
# 内存使用告警
- alert: RocksDBHighMemoryUsage
expr: rocksdb_memtable_usage_bytes > 1073741824 # 1GB
for: 5m
labels:
severity: warning
annotations:
summary: "RocksDB内存使用过高"
description: "MemTable内存使用 {{ $value | humanize1024 }} 超过1GB"
# 压缩队列告警
- alert: RocksDBCompactionPending
expr: rocksdb_compaction_pending > 10
for: 10m
labels:
severity: warning
annotations:
summary: "RocksDB压缩队列积压"
description: "待压缩任务数量 {{ $value }} 超过10个"
4.2 日志监控
4.2.1 日志配置
// 日志配置示例
Options options;
// 配置日志级别
options.info_log_level = InfoLogLevel::INFO_LEVEL;
// 配置日志文件
options.db_log_dir = "/var/log/rocksdb";
options.log_file_time_to_roll = 24 * 60 * 60; // 24小时轮转
options.keep_log_file_num = 30; // 保留30个日志文件
options.max_log_file_size = 100 * 1024 * 1024; // 100MB最大日志文件
// 创建自定义日志器
class CustomLogger : public Logger {
public:
void Logv(const InfoLogLevel log_level, const char* format, va_list ap) override {
// 格式化日志消息
char buffer[1024];
vsnprintf(buffer, sizeof(buffer), format, ap);
// 添加时间戳和级别
auto now = std::chrono::system_clock::now();
auto time_t = std::chrono::system_clock::to_time_t(now);
std::ostringstream oss;
oss << std::put_time(std::localtime(&time_t), "%Y-%m-%d %H:%M:%S");
oss << " [" << GetLogLevelString(log_level) << "] " << buffer;
// 输出到文件和控制台
std::cout << oss.str() << std::endl;
log_file_ << oss.str() << std::endl;
log_file_.flush();
}
private:
std::ofstream log_file_;
const char* GetLogLevelString(InfoLogLevel level) {
switch (level) {
case InfoLogLevel::DEBUG_LEVEL: return "DEBUG";
case InfoLogLevel::INFO_LEVEL: return "INFO";
case InfoLogLevel::WARN_LEVEL: return "WARN";
case InfoLogLevel::ERROR_LEVEL: return "ERROR";
case InfoLogLevel::FATAL_LEVEL: return "FATAL";
default: return "UNKNOWN";
}
}
};
// 使用自定义日志器
options.info_log = std::make_shared<CustomLogger>();
4.2.2 关键日志模式
# 重要日志模式和含义
# 1. 压缩相关日志
"Compaction finished" - 压缩完成
"Compaction started" - 压缩开始
"Compaction aborted" - 压缩中止
# 2. 刷新相关日志
"Flushing memtable" - MemTable刷新
"Flush finished" - 刷新完成
# 3. 错误日志
"Corruption" - 数据损坏
"IO error" - IO错误
"No space left" - 磁盘空间不足
# 4. 性能日志
"Write stall" - 写入阻塞
"Slowdown writes" - 写入减速
# 日志分析脚本
#!/bin/bash
LOG_FILE="/var/log/rocksdb/rocksdb.log"
# 统计错误数量
echo "=== 错误统计 ==="
grep -c "ERROR" $LOG_FILE
grep -c "Corruption" $LOG_FILE
grep -c "IO error" $LOG_FILE
# 统计压缩活动
echo "=== 压缩统计 ==="
grep -c "Compaction started" $LOG_FILE
grep -c "Compaction finished" $LOG_FILE
# 统计写入阻塞
echo "=== 写入阻塞统计 ==="
grep -c "Write stall" $LOG_FILE
grep -c "Slowdown writes" $LOG_FILE
5. 故障排查指南
5.1 常见问题诊断
5.1.1 写入性能问题
// 写入性能诊断工具
class WritePerformanceDiagnostic {
public:
struct DiagnosticResult {
bool memtable_full;
bool l0_files_too_many;
bool compaction_pending;
bool disk_io_slow;
bool wal_sync_slow;
std::string recommendation;
};
DiagnosticResult DiagnoseWritePerformance(DB* db) {
DiagnosticResult result = {};
// 检查MemTable状态
std::string memtable_usage;
db->GetProperty("rocksdb.cur-size-all-mem-tables", &memtable_usage);
uint64_t memtable_size = std::stoull(memtable_usage);
std::string write_buffer_size;
db->GetProperty("rocksdb.write-buffer-size", &write_buffer_size);
uint64_t buffer_size = std::stoull(write_buffer_size);
result.memtable_full = (memtable_size > buffer_size * 0.8);
// 检查Level0文件数量
std::string l0_files;
db->GetProperty("rocksdb.num-files-at-level0", &l0_files);
uint64_t l0_count = std::stoull(l0_files);
result.l0_files_too_many = (l0_count > 10);
// 检查压缩状态
std::string compaction_pending;
db->GetProperty("rocksdb.compaction-pending", &compaction_pending);
result.compaction_pending = (std::stoull(compaction_pending) > 0);
// 生成建议
std::ostringstream recommendations;
if (result.memtable_full) {
recommendations << "增加write_buffer_size或max_write_buffer_number; ";
}
if (result.l0_files_too_many) {
recommendations << "增加压缩线程数或调整level0_file_num_compaction_trigger; ";
}
if (result.compaction_pending) {
recommendations << "增加max_background_compactions; ";
}
result.recommendation = recommendations.str();
return result;
}
};
5.1.2 读取性能问题
// 读取性能诊断工具
class ReadPerformanceDiagnostic {
public:
struct CacheStats {
uint64_t block_cache_hit_rate;
uint64_t index_cache_hit_rate;
uint64_t filter_cache_hit_rate;
uint64_t bloom_filter_useful_rate;
};
CacheStats GetCacheStats(DB* db) {
CacheStats stats = {};
// 获取缓存命中率
std::string cache_hit;
std::string cache_miss;
db->GetProperty("rocksdb.block.cache.hit", &cache_hit);
db->GetProperty("rocksdb.block.cache.miss", &cache_miss);
uint64_t hits = std::stoull(cache_hit);
uint64_t misses = std::stoull(cache_miss);
if (hits + misses > 0) {
stats.block_cache_hit_rate = (hits * 100) / (hits + misses);
}
// 获取布隆过滤器统计
std::string bloom_checked;
std::string bloom_useful;
db->GetProperty("rocksdb.bloom.filter.checked", &bloom_checked);
db->GetProperty("rocksdb.bloom.filter.useful", &bloom_useful);
uint64_t checked = std::stoull(bloom_checked);
uint64_t useful = std::stoull(bloom_useful);
if (checked > 0) {
stats.bloom_filter_useful_rate = (useful * 100) / checked;
}
return stats;
}
std::string DiagnoseReadPerformance(const CacheStats& stats) {
std::ostringstream diagnosis;
if (stats.block_cache_hit_rate < 80) {
diagnosis << "块缓存命中率过低(" << stats.block_cache_hit_rate
<< "%),建议增加block_cache大小; ";
}
if (stats.bloom_filter_useful_rate < 50) {
diagnosis << "布隆过滤器效果不佳(" << stats.bloom_filter_useful_rate
<< "%),检查数据访问模式; ";
}
return diagnosis.str();
}
};
5.2 数据恢复策略
5.2.1 备份策略
// 备份管理示例
class RocksDBBackupManager {
public:
RocksDBBackupManager(const std::string& backup_dir)
: backup_dir_(backup_dir) {}
// 创建备份
Status CreateBackup(DB* db, bool flush_before_backup = true) {
BackupEngineOptions backup_options;
backup_options.backup_dir = backup_dir_;
backup_options.share_table_files = true; // 共享表文件,节省空间
backup_options.sync = true; // 同步写入
BackupEngine* backup_engine;
Status s = BackupEngine::Open(Env::Default(), backup_options, &backup_engine);
if (!s.ok()) {
return s;
}
// 创建备份
s = backup_engine->CreateNewBackup(db, flush_before_backup);
delete backup_engine;
return s;
}
// 恢复备份
Status RestoreBackup(uint32_t backup_id, const std::string& db_dir) {
BackupEngineOptions backup_options;
backup_options.backup_dir = backup_dir_;
BackupEngine* backup_engine;
Status s = BackupEngine::Open(Env::Default(), backup_options, &backup_engine);
if (!s.ok()) {
return s;
}
// 恢复备份
s = backup_engine->RestoreDBFromBackup(backup_id, db_dir, db_dir);
delete backup_engine;
return s;
}
// 列出所有备份
std::vector<BackupInfo> GetBackupInfo() {
BackupEngineOptions backup_options;
backup_options.backup_dir = backup_dir_;
BackupEngine* backup_engine;
Status s = BackupEngine::Open(Env::Default(), backup_options, &backup_engine);
if (!s.ok()) {
return {};
}
std::vector<BackupInfo> backup_info;
backup_engine->GetBackupInfo(&backup_info);
delete backup_engine;
return backup_info;
}
// 删除旧备份
Status PurgeOldBackups(uint32_t num_backups_to_keep) {
BackupEngineOptions backup_options;
backup_options.backup_dir = backup_dir_;
BackupEngine* backup_engine;
Status s = BackupEngine::Open(Env::Default(), backup_options, &backup_engine);
if (!s.ok()) {
return s;
}
s = backup_engine->PurgeOldBackups(num_backups_to_keep);
delete backup_engine;
return s;
}
private:
std::string backup_dir_;
};
// 自动备份脚本
void AutoBackupScheduler(DB* db, RocksDBBackupManager* backup_manager) {
std::thread backup_thread([db, backup_manager]() {
while (true) {
// 每天凌晨2点创建备份
auto now = std::chrono::system_clock::now();
auto time_t = std::chrono::system_clock::to_time_t(now);
auto tm = *std::localtime(&time_t);
if (tm.tm_hour == 2 && tm.tm_min == 0) {
Status s = backup_manager->CreateBackup(db);
if (s.ok()) {
std::cout << "备份创建成功" << std::endl;
// 保留最近7天的备份
backup_manager->PurgeOldBackups(7);
} else {
std::cerr << "备份创建失败: " << s.ToString() << std::endl;
}
// 等待一分钟,避免重复备份
std::this_thread::sleep_for(std::chrono::minutes(1));
}
// 每分钟检查一次
std::this_thread::sleep_for(std::chrono::minutes(1));
}
});
backup_thread.detach();
}
5.2.2 数据修复
// 数据修复工具
class RocksDBRepairTool {
public:
// 修复数据库
static Status RepairDatabase(const std::string& db_path,
const Options& options) {
// 使用RocksDB内置的修复功能
return RepairDB(db_path, options);
}
// 检查数据库一致性
static Status CheckConsistency(DB* db) {
// 创建迭代器遍历所有数据
ReadOptions read_opts;
read_opts.verify_checksums = true;
std::unique_ptr<Iterator> iter(db->NewIterator(read_opts));
uint64_t key_count = 0;
uint64_t error_count = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
key_count++;
// 验证键值对
if (!iter->status().ok()) {
error_count++;
std::cerr << "键值对错误: " << iter->status().ToString() << std::endl;
}
// 每10万条记录报告一次进度
if (key_count % 100000 == 0) {
std::cout << "已检查 " << key_count << " 条记录" << std::endl;
}
}
std::cout << "一致性检查完成: " << key_count << " 条记录, "
<< error_count << " 个错误" << std::endl;
return iter->status();
}
// 压缩所有数据(修复碎片)
static Status CompactAll(DB* db) {
CompactRangeOptions compact_options;
compact_options.change_level = true;
compact_options.target_level = -1; // 压缩到最底层
return db->CompactRange(compact_options, nullptr, nullptr);
}
};
6. 生产环境部署
6.1 容器化部署
6.1.1 Docker配置
# Dockerfile for RocksDB application
FROM ubuntu:20.04
# 安装依赖
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
libgflags-dev \
libsnappy-dev \
zlib1g-dev \
libbz2-dev \
liblz4-dev \
libzstd-dev \
&& rm -rf /var/lib/apt/lists/*
# 创建用户
RUN useradd -m -s /bin/bash rocksdb
# 设置工作目录
WORKDIR /app
# 复制应用程序
COPY --chown=rocksdb:rocksdb . .
# 编译应用程序
RUN make clean && make -j$(nproc)
# 创建数据目录
RUN mkdir -p /data/rocksdb && chown -R rocksdb:rocksdb /data
# 切换用户
USER rocksdb
# 暴露端口
EXPOSE 8080
# 启动命令
CMD ["./rocksdb_server", "--db_path=/data/rocksdb"]
6.1.2 Kubernetes部署
# rocksdb-deployment.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: rocksdb
labels:
app: rocksdb
spec:
serviceName: rocksdb
replicas: 3
selector:
matchLabels:
app: rocksdb
template:
metadata:
labels:
app: rocksdb
spec:
containers:
- name: rocksdb
image: rocksdb:latest
ports:
- containerPort: 8080
env:
- name: DB_PATH
value: "/data/rocksdb"
- name: WAL_PATH
value: "/wal/rocksdb"
resources:
requests:
memory: "4Gi"
cpu: "2"
limits:
memory: "8Gi"
cpu: "4"
volumeMounts:
- name: data
mountPath: /data
- name: wal
mountPath: /wal
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /ready
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: "fast-ssd"
resources:
requests:
storage: 100Gi
- metadata:
name: wal
spec:
accessModes: ["ReadWriteOnce"]
storageClassName: "fast-ssd"
resources:
requests:
storage: 20Gi
---
apiVersion: v1
kind: Service
metadata:
name: rocksdb-service
spec:
selector:
app: rocksdb
ports:
- port: 8080
targetPort: 8080
type: ClusterIP
6.2 高可用部署
6.2.1 主从复制
// 简单的主从复制实现
class RocksDBReplication {
public:
// 主节点
class Master {
public:
Master(DB* db) : db_(db) {}
// 写入数据并复制到从节点
Status Write(const WriteOptions& options, WriteBatch* batch) {
// 先写入本地
Status s = db_->Write(options, batch);
if (!s.ok()) {
return s;
}
// 复制到从节点
for (auto& slave : slaves_) {
slave->ReplicateWriteBatch(batch);
}
return Status::OK();
}
void AddSlave(std::shared_ptr<Slave> slave) {
slaves_.push_back(slave);
}
private:
DB* db_;
std::vector<std::shared_ptr<Slave>> slaves_;
};
// 从节点
class Slave {
public:
Slave(DB* db) : db_(db) {}
// 应用主节点的写入
Status ReplicateWriteBatch(WriteBatch* batch) {
WriteOptions options;
options.sync = false; // 异步写入,提高性能
return db_->Write(options, batch);
}
// 只读查询
Status Get(const ReadOptions& options, const Slice& key,
std::string* value) {
return db_->Get(options, key, value);
}
private:
DB* db_;
};
};
6.2.2 负载均衡
// 读写分离负载均衡器
class RocksDBLoadBalancer {
public:
void AddMaster(std::shared_ptr<RocksDBReplication::Master> master) {
master_ = master;
}
void AddSlave(std::shared_ptr<RocksDBReplication::Slave> slave) {
slaves_.push_back(slave);
}
// 写入操作路由到主节点
Status Write(const WriteOptions& options, WriteBatch* batch) {
if (!master_) {
return Status::InvalidArgument("No master available");
}
return master_->Write(options, batch);
}
// 读取操作路由到从节点(轮询)
Status Get(const ReadOptions& options, const Slice& key,
std::string* value) {
if (slaves_.empty()) {
return Status::InvalidArgument("No slaves available");
}
// 轮询选择从节点
size_t index = next_slave_index_.fetch_add(1) % slaves_.size();
return slaves_[index]->Get(options, key, value);
}
private:
std::shared_ptr<RocksDBReplication::Master> master_;
std::vector<std::shared_ptr<RocksDBReplication::Slave>> slaves_;
std::atomic<size_t> next_slave_index_{0};
};
7. 总结
本文档总结了RocksDB在生产环境中的最佳实践,包括:
- 性能调优:从写入、读取、压缩三个维度进行优化
- 配置调优:硬件配置、操作系统调优、参数配置
- 监控告警:关键指标监控、日志分析、告警规则
- 故障排查:常见问题诊断、数据恢复策略
- 生产部署:容器化部署、高可用架构
这些实践经验可以帮助开发者更好地使用RocksDB,在生产环境中获得最佳的性能和稳定性。建议根据具体的业务场景和硬件环境,选择合适的配置和优化策略。