PostgreSQL-16-实战案例与最佳实践
文档概述
本文档汇总 PostgreSQL 在实际生产环境中的使用示例、性能调优经验、故障诊断方法和开发最佳实践。内容涵盖从基础配置到高级优化的各个方面,帮助开发者和DBA快速掌握 PostgreSQL 的最佳使用方式。
1. 基础配置与环境搭建
1.1 生产环境安装
环境要求
- OS: Linux (Ubuntu 22.04 / CentOS 8)
- CPU: 8 核以上
- 内存: 64GB 以上
- 磁盘: SSD/NVMe,RAID 10 配置
编译安装
# 安装依赖
sudo apt-get install -y build-essential libreadline-dev zlib1g-dev \
flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils \
xsltproc libicu-dev pkg-config
# 下载源码
wget https://ftp.postgresql.org/pub/source/v16.1/postgresql-16.1.tar.gz
tar -xzvf postgresql-16.1.tar.gz
cd postgresql-16.1
# 配置(启用 ICU、OpenSSL、LLVM JIT)
./configure --prefix=/usr/local/pgsql-16 \
--with-openssl \
--with-icu \
--with-llvm \
--enable-debug \
--enable-cassert
# 编译与安装
make -j8
sudo make install
# 创建 postgres 用户
sudo useradd -m -s /bin/bash postgres
sudo mkdir -p /data/pgdata
sudo chown postgres:postgres /data/pgdata
初始化数据库
# 切换到 postgres 用户
sudo su - postgres
# 初始化数据目录
/usr/local/pgsql-16/bin/initdb -D /data/pgdata \
--encoding=UTF8 \
--locale=en_US.UTF-8 \
--data-checksums \
--wal-segsize=64
# 启动数据库
/usr/local/pgsql-16/bin/pg_ctl -D /data/pgdata -l logfile start
# 创建测试数据库
/usr/local/pgsql-16/bin/createdb testdb
1.2 核心配置参数
postgresql.conf 优化配置
# ===== 连接配置 =====
listen_addresses = '*' # 监听所有地址
port = 5432
max_connections = 200 # 最大连接数(建议使用连接池)
superuser_reserved_connections = 10
# ===== 内存配置 =====
shared_buffers = 16GB # 共享缓冲区(系统内存的 25%)
effective_cache_size = 48GB # 操作系统缓存大小(系统内存的 75%)
work_mem = 64MB # 排序/哈希操作内存
maintenance_work_mem = 2GB # 维护操作内存(VACUUM、CREATE INDEX)
max_stack_depth = 7MB
# ===== WAL 配置 =====
wal_level = replica # WAL 级别(minimal/replica/logical)
fsync = on # 强制同步(生产环境必须开启)
synchronous_commit = on # 同步提交(性能 vs 持久性权衡)
wal_buffers = 16MB # WAL 缓冲区
checkpoint_timeout = 15min # 检查点间隔
max_wal_size = 10GB # 检查点触发的 WAL 量
min_wal_size = 2GB # 保留的最小 WAL
checkpoint_completion_target = 0.9 # 检查点完成目标(平滑 I/O)
# ===== 查询优化配置 =====
random_page_cost = 1.1 # 随机读代价(SSD:1.1,HDD:4.0)
effective_io_concurrency = 200 # 并发 I/O 能力(SSD:200,HDD:2)
seq_page_cost = 1.0 # 顺序读代价
cpu_tuple_cost = 0.01 # 处理元组的 CPU 代价
cpu_index_tuple_cost = 0.005 # 处理索引元组的 CPU 代价
cpu_operator_cost = 0.0025 # 执行操作符的 CPU 代价
default_statistics_target = 100 # 统计信息采样精度
# ===== 并行查询配置 =====
max_worker_processes = 8 # 后台工作进程数
max_parallel_workers_per_gather = 4 # 单个 Gather 最大并行度
max_parallel_workers = 8 # 全局最大并行工作进程
max_parallel_maintenance_workers = 4 # 维护操作并行度
parallel_tuple_cost = 0.1 # 并行传输元组代价
parallel_setup_cost = 1000.0 # 并行启动代价
min_parallel_table_scan_size = 8MB # 启用并行的最小表大小
min_parallel_index_scan_size = 512kB # 启用并行的最小索引大小
# ===== Autovacuum 配置 =====
autovacuum = on # 启用自动 VACUUM
autovacuum_max_workers = 3 # 最大 autovacuum 工作进程
autovacuum_naptime = 1min # autovacuum 检查间隔
autovacuum_vacuum_threshold = 50 # VACUUM 触发阈值(行数)
autovacuum_vacuum_scale_factor = 0.2 # VACUUM 触发因子(20% 修改)
autovacuum_vacuum_cost_delay = 2ms # VACUUM 代价延迟
autovacuum_vacuum_cost_limit = 200 # VACUUM 代价限制
# ===== 日志配置 =====
logging_collector = on # 启用日志收集器
log_directory = 'log' # 日志目录
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_file_mode = 0600
log_rotation_age = 1d # 日志文件轮转时间
log_rotation_size = 100MB # 日志文件轮转大小
log_min_duration_statement = 1000 # 记录超过 1 秒的查询
log_checkpoints = on # 记录检查点
log_connections = on # 记录连接
log_disconnections = on # 记录断开
log_lock_waits = on # 记录锁等待
log_temp_files = 0 # 记录所有临时文件
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h '
log_timezone = 'UTC'
# ===== 慢查询日志配置 =====
shared_preload_libraries = 'pg_stat_statements' # 加载慢查询统计扩展
pg_stat_statements.max = 10000 # 最多记录 10000 条 SQL
pg_stat_statements.track = all # 跟踪所有 SQL
pg_stat_statements.track_utility = on # 跟踪 DDL
pg_hba.conf 安全配置
# TYPE DATABASE USER ADDRESS METHOD
# 本地连接(使用 Unix Socket)
local all all peer
# 本地 TCP 连接
host all all 127.0.0.1/32 scram-sha-256
host all all ::1/128 scram-sha-256
# 远程连接(指定 IP 段)
host all all 192.168.1.0/24 scram-sha-256
host all all 10.0.0.0/8 scram-sha-256
# 复制连接
host replication replication 192.168.1.0/24 scram-sha-256
2. 性能优化实战
2.1 索引优化
案例 1:单列索引 vs 复合索引
场景:查询订单表,WHERE 条件包含 user_id 和 status
-- 创建测试表
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL,
status VARCHAR(20) NOT NULL,
amount NUMERIC(10,2),
created_at TIMESTAMP DEFAULT NOW()
);
-- 插入测试数据(100 万行)
INSERT INTO orders (user_id, status, amount)
SELECT
(random() * 10000)::INTEGER,
CASE (random() * 3)::INTEGER
WHEN 0 THEN 'pending'
WHEN 1 THEN 'paid'
ELSE 'shipped'
END,
(random() * 1000)::NUMERIC(10,2)
FROM generate_series(1, 1000000);
-- 方案 1:两个单列索引
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_status ON orders(status);
-- 查询性能
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM orders WHERE user_id = 123 AND status = 'paid';
/*
输出(方案 1):
Bitmap Heap Scan on orders (cost=25.67..1234.56 rows=50 width=...)
Recheck Cond: ((user_id = 123) AND ((status)::text = 'paid'::text))
Buffers: shared hit=150
-> BitmapAnd (cost=25.67..25.67 rows=50 width=0)
Buffers: shared hit=10
-> Bitmap Index Scan on idx_orders_user_id (cost=0.00..10.00 rows=100 width=0)
Index Cond: (user_id = 123)
-> Bitmap Index Scan on idx_orders_status (cost=0.00..15.00 rows=333333 width=0)
Index Cond: ((status)::text = 'paid'::text)
Planning Time: 0.5 ms
Execution Time: 12.3 ms
*/
-- 方案 2:复合索引
DROP INDEX idx_orders_user_id;
DROP INDEX idx_orders_status;
CREATE INDEX idx_orders_user_status ON orders(user_id, status);
-- 查询性能
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM orders WHERE user_id = 123 AND status = 'paid';
/*
输出(方案 2):
Index Scan using idx_orders_user_status on orders (cost=0.42..15.67 rows=50 width=...)
Index Cond: ((user_id = 123) AND ((status)::text = 'paid'::text))
Buffers: shared hit=5
Planning Time: 0.3 ms
Execution Time: 0.8 ms
*/
结论:复合索引性能提升 15 倍(12.3ms → 0.8ms),Buffer 访问减少 30 倍(150 → 5)。
最佳实践:
- 高选择性列在前(user_id 选择性高于 status)
- 复合索引顺序:WHERE = AND = AND … → ORDER BY → SELECT
- 避免过多索引(每个索引增加写入开销)
案例 2:部分索引(Partial Index)
场景:仅查询 status = 'pending' 的订单(占总数 10%)
-- 全表索引
CREATE INDEX idx_orders_status_full ON orders(status);
-- 索引大小:约 20MB
-- 部分索引(仅索引 pending 状态)
CREATE INDEX idx_orders_status_pending ON orders(status) WHERE status = 'pending';
-- 索引大小:约 2MB
-- 查询
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM orders WHERE status = 'pending' ORDER BY created_at DESC LIMIT 10;
/*
使用部分索引:
Index Scan using idx_orders_status_pending on orders (cost=0.29..10.5 rows=10 width=...)
Filter: ((status)::text = 'pending'::text)
Buffers: shared hit=3
Planning Time: 0.1 ms
Execution Time: 0.3 ms
*/
优势:
- 索引大小减少 90%(20MB → 2MB)
- 查询速度提升(更少的索引页访问)
- 降低 VACUUM 开销
2.2 查询优化
案例 3:避免隐式类型转换
-- 错误示例:字符串与整数比较
EXPLAIN ANALYZE
SELECT * FROM orders WHERE user_id = '123'; -- user_id 为 INTEGER 类型
/*
输出:
Seq Scan on orders (cost=0.00..25000.00 rows=5000 width=...)
Filter: ((user_id)::text = '123'::text) -- 发生类型转换,索引失效!
Rows Removed by Filter: 995000
Buffers: shared hit=10000
Planning Time: 0.2 ms
Execution Time: 234.5 ms
*/
-- 正确示例:类型匹配
EXPLAIN ANALYZE
SELECT * FROM orders WHERE user_id = 123;
/*
输出:
Index Scan using idx_orders_user_id on orders (cost=0.42..8.5 rows=1 width=...)
Index Cond: (user_id = 123)
Buffers: shared hit=4
Planning Time: 0.1 ms
Execution Time: 0.2 ms
*/
结论:类型转换导致性能下降 1000 倍以上!
案例 4:CTE 优化(WITH 子句)
PostgreSQL 12 之前,CTE 会被物化(Materialized),无法优化。PostgreSQL 12+ 支持 CTE 内联优化。
-- 低效写法(PostgreSQL < 12)
WITH recent_orders AS (
SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'
)
SELECT * FROM recent_orders WHERE user_id = 123;
/*
CTE Scan on recent_orders (cost=25000.00..25010.00 rows=1 width=...)
Filter: (user_id = 123)
-> Materialize (cost=0.00..25000.00 rows=10000 width=...)
-> Seq Scan on orders (cost=0.00..20000.00 rows=10000 width=...)
Filter: (created_at > (now() - '7 days'::interval))
Planning Time: 0.5 ms
Execution Time: 150.2 ms
*/
-- 高效写法(PostgreSQL 12+):CTE 内联
WITH recent_orders AS (
SELECT * FROM orders WHERE created_at > NOW() - INTERVAL '7 days'
)
SELECT * FROM recent_orders WHERE user_id = 123;
/*
Index Scan using idx_orders_user_created on orders (cost=0.42..15.5 rows=1 width=...)
Index Cond: ((user_id = 123) AND (created_at > (now() - '7 days'::interval)))
Planning Time: 0.3 ms
Execution Time: 0.5 ms
*/
-- 或直接使用子查询
SELECT * FROM orders
WHERE created_at > NOW() - INTERVAL '7 days'
AND user_id = 123;
结论:CTE 内联优化提升性能 300 倍(150ms → 0.5ms)。
2.3 分区表优化
案例 5:时间范围分区
场景:日志表按月分区,查询近 7 天的数据
-- 创建分区表
CREATE TABLE logs (
id BIGSERIAL,
user_id INTEGER,
action VARCHAR(50),
created_at TIMESTAMP NOT NULL,
data JSONB
) PARTITION BY RANGE (created_at);
-- 创建分区(2024 年各月)
CREATE TABLE logs_2024_01 PARTITION OF logs
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE logs_2024_02 PARTITION OF logs
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
-- ... 其他月份
-- 创建索引(每个分区自动继承)
CREATE INDEX idx_logs_created_at ON logs(created_at);
CREATE INDEX idx_logs_user_id ON logs(user_id);
-- 查询(自动分区剪裁)
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM logs WHERE created_at >= '2024-01-15' AND created_at < '2024-01-22';
/*
Append (cost=0.42..100.5 rows=1000 width=...)
-> Index Scan using logs_2024_01_created_at_idx on logs_2024_01 (cost=0.42..100.5 rows=1000 width=...)
Index Cond: ((created_at >= '2024-01-15'::timestamp) AND (created_at < '2024-01-22'::timestamp))
Planning Time: 0.5 ms
Execution Time: 5.2 ms
*/
优势:
- 仅扫描相关分区(logs_2024_01),避免全表扫描
- 历史分区可移动到廉价存储(如 HDD)
- 删除历史数据时直接 DROP PARTITION(无需 VACUUM)
最佳实践:
- 分区键选择:时间列、区域 ID 等高选择性列
- 分区数量:建议 < 100 个(过多分区影响规划性能)
- 定期维护:使用脚本自动创建未来分区
3. 事务与并发控制
3.1 MVCC 与事务隔离级别
案例 6:Read Committed 的不可重复读
-- 会话 1
BEGIN;
SELECT * FROM accounts WHERE id = 1; -- balance = 1000
-- 等待会话 2 提交
SELECT * FROM accounts WHERE id = 1; -- balance = 900(不可重复读!)
COMMIT;
-- 会话 2
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;
结论:Read Committed 级别下,同一事务内两次读取可能看到不同结果。
案例 7:Repeatable Read 的幻读防护
-- 会话 1(Repeatable Read 级别)
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SELECT COUNT(*) FROM orders WHERE user_id = 123; -- 结果:10
-- 等待会话 2 提交
SELECT COUNT(*) FROM orders WHERE user_id = 123; -- 结果:10(防止幻读!)
COMMIT;
-- 会话 2
BEGIN;
INSERT INTO orders (user_id, status, amount) VALUES (123, 'paid', 100);
COMMIT;
结论:Repeatable Read 级别下,事务看到的数据快照在事务开始时确定,不受其他事务影响。
3.2 死锁检测与预防
案例 8:死锁场景与诊断
-- 会话 1
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1; -- 锁定 id=1
-- 等待锁 id=2
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- 会话 2
BEGIN;
UPDATE accounts SET balance = balance - 50 WHERE id = 2; -- 锁定 id=2
-- 等待锁 id=1(死锁!)
UPDATE accounts SET balance = balance + 50 WHERE id = 1;
/*
错误输出(会话 2):
ERROR: deadlock detected
DETAIL: Process 12345 waits for ShareLock on transaction 67890; blocked by process 12346.
Process 12346 waits for ShareLock on transaction 67891; blocked by process 12345.
HINT: See server log for query details.
CONTEXT: while updating tuple (0,1) in relation "accounts"
*/
预防策略:
-
统一锁顺序:始终按主键递增顺序更新
-- 正确:先锁 id=1,再锁 id=2 UPDATE accounts SET balance = balance - 100 WHERE id IN (1, 2) ORDER BY id; -
使用 SELECT FOR UPDATE:显式加锁
BEGIN; SELECT * FROM accounts WHERE id IN (1, 2) ORDER BY id FOR UPDATE; UPDATE accounts SET balance = balance - 100 WHERE id = 1; UPDATE accounts SET balance = balance + 100 WHERE id = 2; COMMIT; -
调整
deadlock_timeout:SET deadlock_timeout = '100ms'; -- 降低死锁检测延迟
3.3 长事务优化
案例 9:长事务的危害
-- 长事务示例
BEGIN;
SELECT pg_backend_pid(); -- 获取进程 PID
-- 长时间空闲(如等待用户输入)
-- ... 10 分钟后 ...
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;
危害:
- 阻塞 VACUUM:死元组无法清理,表膨胀
- 连接占用:长期持有连接,耗尽连接池
- 锁等待:其他事务等待锁,影响并发
诊断:
-- 查看长事务
SELECT pid, now() - xact_start AS duration, state, query
FROM pg_stat_activity
WHERE state != 'idle'
AND xact_start IS NOT NULL
ORDER BY duration DESC
LIMIT 10;
-- 终止长事务
SELECT pg_terminate_backend(12345); -- 12345 为 PID
最佳实践:
- 事务尽可能短小(< 1 秒)
- 避免在事务中执行耗时操作(如外部 API 调用)
- 配置
idle_in_transaction_session_timeout:SET idle_in_transaction_session_timeout = '5min';
4. 高可用与复制
4.1 流复制配置
主库配置
# postgresql.conf
wal_level = replica
max_wal_senders = 5
wal_keep_size = 1GB # 保留 1GB WAL(防止备库延迟过大)
archive_mode = on
archive_command = 'test ! -f /archive/%f && cp %p /archive/%f'
# pg_hba.conf
host replication replication 192.168.1.0/24 scram-sha-256
备库配置
# 备库初始化(使用 pg_basebackup)
pg_basebackup -h 192.168.1.10 -U replication -D /data/pgdata_standby \
-X stream -P -R
# -X stream: 流式接收 WAL
# -P: 显示进度
# -R: 自动生成 standby.signal 和 recovery 配置
standby.signal(备库标识文件)
# 空文件,表示该实例为备库
postgresql.auto.conf(备库配置)
primary_conninfo = 'host=192.168.1.10 port=5432 user=replication password=xxx'
restore_command = 'cp /archive/%f %p'
启动备库
pg_ctl -D /data/pgdata_standby start
验证复制状态
主库:
-- 查看 WAL Sender 状态
SELECT * FROM pg_stat_replication;
/*
pid | usename | application_name | state | sync_state | replay_lag
-----|--------------|------------------|-----------|------------|------------
1234 | replication | walreceiver | streaming | async | 00:00:00.1
*/
备库:
-- 查看复制延迟
SELECT now() - pg_last_xact_replay_timestamp() AS replication_lag;
/*
replication_lag
-----------------
00:00:00.234
*/
4.2 同步复制与故障切换
同步复制配置
主库:
synchronous_standby_names = 'standby1' # 同步备库名称
synchronous_commit = on
备库:
cluster_name = 'standby1' # 备库名称
故障切换(Failover)
-
检测主库故障:
pg_isready -h 192.168.1.10 # 输出:no response -
提升备库为主库:
# 在备库执行 pg_ctl promote -D /data/pgdata_standby # 或使用 pg_ctl 命令文件 pg_ctl -D /data/pgdata_standby promote -
验证备库已成为主库:
SELECT pg_is_in_recovery(); -- 输出:f(false,表示已是主库) -
应用切换连接: 修改应用配置,连接到新主库(192.168.1.11)
自动故障切换(使用 Patroni)
Patroni 是基于 etcd/Consul/ZooKeeper 的高可用方案,支持自动故障检测与切换。
# patroni.yml
scope: postgres-cluster
name: node1
restapi:
listen: 0.0.0.0:8008
connect_address: 192.168.1.10:8008
etcd:
host: 192.168.1.100:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
parameters:
wal_level: replica
hot_standby: on
max_wal_senders: 5
synchronous_commit: on
5. 监控与诊断
5.1 关键性能指标
实时活动监控
-- 当前活动会话
SELECT pid, usename, application_name, state,
now() - query_start AS query_duration,
substring(query, 1, 50) AS query
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY query_duration DESC;
-- 锁等待
SELECT blocked_locks.pid AS blocked_pid,
blocked_activity.usename AS blocked_user,
blocking_locks.pid AS blocking_pid,
blocking_activity.usename AS blocking_user,
blocked_activity.query AS blocked_statement,
blocking_activity.query AS blocking_statement
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
ON blocking_locks.locktype = blocked_locks.locktype
AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;
慢查询分析
-- 启用 pg_stat_statements 扩展
CREATE EXTENSION pg_stat_statements;
-- 查询最慢的 10 条 SQL
SELECT query,
calls,
total_exec_time / 1000 AS total_time_sec,
mean_exec_time / 1000 AS mean_time_sec,
max_exec_time / 1000 AS max_time_sec,
stddev_exec_time / 1000 AS stddev_time_sec,
rows
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
-- 重置统计信息
SELECT pg_stat_statements_reset();
缓冲区命中率
-- 表缓冲区命中率(应 > 99%)
SELECT
schemaname,
relname,
heap_blks_read,
heap_blks_hit,
CASE WHEN (heap_blks_hit + heap_blks_read) = 0 THEN 0
ELSE heap_blks_hit * 100.0 / (heap_blks_hit + heap_blks_read)
END AS hit_ratio
FROM pg_statio_user_tables
ORDER BY heap_blks_read DESC
LIMIT 10;
-- 索引缓冲区命中率
SELECT
schemaname,
relname,
indexrelname,
idx_blks_read,
idx_blks_hit,
CASE WHEN (idx_blks_hit + idx_blks_read) = 0 THEN 0
ELSE idx_blks_hit * 100.0 / (idx_blks_hit + idx_blks_read)
END AS hit_ratio
FROM pg_statio_user_indexes
ORDER BY idx_blks_read DESC
LIMIT 10;
表膨胀检测
-- 检测表膨胀(死元组占比)
SELECT
schemaname,
relname,
n_live_tup AS live_tuples,
n_dead_tup AS dead_tuples,
CASE WHEN n_live_tup = 0 THEN 0
ELSE n_dead_tup * 100.0 / n_live_tup
END AS dead_ratio,
last_autovacuum,
last_autoanalyze
FROM pg_stat_user_tables
WHERE n_dead_tup > 1000
ORDER BY dead_ratio DESC
LIMIT 10;
-- 手动 VACUUM
VACUUM (VERBOSE, ANALYZE) orders;
5.2 日志分析
使用 pgBadger 分析日志
# 安装 pgBadger
sudo apt-get install pgbadger
# 分析日志
pgbadger /var/log/postgresql/postgresql-2024-01-15.log -o report.html
# 在浏览器打开 report.html
# 包含:慢查询 TOP 10、错误统计、会话统计、锁等待等
实时监控工具
-
pg_top:类似 top 的 PostgreSQL 监控工具
pg_top -h localhost -U postgres -d testdb -
pgAdmin:图形化管理工具
-
Grafana + Prometheus + postgres_exporter:监控仪表盘
6. 备份与恢复
6.1 物理备份(pg_basebackup)
# 全量备份
pg_basebackup -h localhost -U postgres -D /backup/pgdata_$(date +%Y%m%d) \
-Ft -z -Xs -P
# -Ft: tar 格式
# -z: 压缩
# -Xs: 流式传输 WAL
# -P: 显示进度
6.2 逻辑备份(pg_dump)
# 备份单个数据库
pg_dump -h localhost -U postgres -Fc -f testdb_$(date +%Y%m%d).dump testdb
# -Fc: 自定义压缩格式
# 备份所有数据库
pg_dumpall -h localhost -U postgres -f all_dbs_$(date +%Y%m%d).sql
6.3 时间点恢复(PITR)
配置 WAL 归档
# postgresql.conf
wal_level = replica
archive_mode = on
archive_command = 'test ! -f /archive/%f && cp %p /archive/%f'
恢复步骤
- 停止数据库
- 清空数据目录
- 恢复基础备份
- 配置恢复目标时间
- 启动数据库
# 1. 停止数据库
pg_ctl stop -D /data/pgdata
# 2. 清空数据目录
rm -rf /data/pgdata/*
# 3. 恢复基础备份
tar -xzf /backup/pgdata_20240115.tar.gz -C /data/pgdata
# 4. 配置恢复
cat > /data/pgdata/recovery.signal << EOF
restore_command = 'cp /archive/%f %p'
recovery_target_time = '2024-01-15 14:30:00'
recovery_target_action = 'promote'
EOF
# 5. 启动数据库
pg_ctl start -D /data/pgdata
# 数据库会自动应用 WAL 至指定时间点
7. 安全最佳实践
7.1 用户与权限管理
-- 创建只读用户
CREATE ROLE readonly_user LOGIN PASSWORD 'secure_password';
-- 授予连接权限
GRANT CONNECT ON DATABASE testdb TO readonly_user;
-- 授予 SELECT 权限
GRANT USAGE ON SCHEMA public TO readonly_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly_user;
-- 自动授予新表的 SELECT 权限
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT ON TABLES TO readonly_user;
-- 创建应用用户(读写权限)
CREATE ROLE app_user LOGIN PASSWORD 'secure_password';
GRANT CONNECT ON DATABASE testdb TO app_user;
GRANT USAGE, CREATE ON SCHEMA public TO app_user;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_user;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO app_user;
7.2 SSL 连接
服务端配置
# postgresql.conf
ssl = on
ssl_cert_file = '/etc/postgresql/server.crt'
ssl_key_file = '/etc/postgresql/server.key'
ssl_ca_file = '/etc/postgresql/ca.crt'
客户端连接
psql "host=192.168.1.10 dbname=testdb user=postgres sslmode=verify-full"
7.3 行级安全策略(RLS)
-- 启用 RLS
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;
-- 创建策略:用户只能查看自己的订单
CREATE POLICY orders_user_policy ON orders
FOR SELECT
USING (user_id = current_setting('app.user_id')::INTEGER);
-- 应用设置用户 ID
SET app.user_id = 123;
-- 查询(自动过滤)
SELECT * FROM orders; -- 仅返回 user_id = 123 的订单
总结
本文档涵盖了 PostgreSQL 在实际生产环境中的核心实践,包括:
- 环境搭建与配置优化
- 性能调优(索引、查询、分区)
- 事务与并发控制
- 高可用与复制
- 监控与诊断
- 备份与恢复
- 安全最佳实践
通过这些案例和最佳实践,开发者和 DBA 可以快速掌握 PostgreSQL 的高效使用方式,构建稳定、高性能的数据库系统。