graph LR
A[代码提交] --> B[代码检查]
B --> C[单元测试]
C --> D[安全扫描]
D --> E[构建镜像]
E --> F[部署测试环境]
F --> G[集成测试]
G --> H[部署生产环境]
H --> I[监控验证]
B --> J[代码质量门禁]
C --> K[测试覆盖率检查]
D --> L[安全漏洞检测]
G --> M[冒烟测试]
I --> N[回滚机制]
graph TB
A[灾难发生] --> B[评估影响范围]
B --> C{数据中心可用?}
C -->|是| D[本地恢复]
C -->|否| E[异地恢复]
D --> F[启动备用系统]
E --> G[激活灾备中心]
F --> H[恢复数据库]
G --> H
H --> I[恢复应用服务]
I --> J[验证系统功能]
J --> K[切换用户流量]
K --> L[监控系统状态]
L --> M[恢复完成]
#!/bin/bash
# scripts/disaster-recovery.sh
set -e
# 配置参数
DR_SITE_HOST="dr.example.com"
DR_DATABASE_URL="postgresql://user:pass@dr-db:5432/newapi"
DR_BACKUP_PATH="/dr/backups"
HEALTH_CHECK_URL="http://dr.example.com/health"
DNS_FAILOVER_SCRIPT="/scripts/dns-failover.sh"
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a /var/log/disaster-recovery.log
}
# 检查灾备站点状态
check_dr_site() {
log "Checking disaster recovery site status..."
if curl -f -s "$HEALTH_CHECK_URL" > /dev/null; then
log "DR site is healthy"
return 0
else
log "DR site is not responding"
return 1
fi
}
# 激活灾备站点
activate_dr_site() {
log "Activating disaster recovery site..."
# 启动灾备数据库
ssh "$DR_SITE_HOST" "docker-compose -f /opt/newapi/docker-compose-dr.yml up -d db"
# 等待数据库启动
sleep 30
# 恢复最新备份
LATEST_BACKUP=$(ssh "$DR_SITE_HOST" "ls -t $DR_BACKUP_PATH/backup_*.sql | head -1")
if [ -n "$LATEST_BACKUP" ]; then
log "Restoring from backup: $LATEST_BACKUP"
ssh "$DR_SITE_HOST" "pg_restore -d '$DR_DATABASE_URL' '$LATEST_BACKUP'"
else
log "No backup found for restoration"
exit 1
fi
# 启动应用服务
ssh "$DR_SITE_HOST" "docker-compose -f /opt/newapi/docker-compose-dr.yml up -d app"
# 等待应用启动
sleep 60
log "DR site activated successfully"
}
# DNS故障转移
perform_dns_failover() {
log "Performing DNS failover..."
if [ -x "$DNS_FAILOVER_SCRIPT" ]; then
"$DNS_FAILOVER_SCRIPT" "$DR_SITE_HOST"
log "DNS failover completed"
else
log "DNS failover script not found or not executable"
fi
}
# 验证恢复结果
verify_recovery() {
log "Verifying disaster recovery..."
# 检查应用健康状态
for i in {1..10}; do
if curl -f -s "$HEALTH_CHECK_URL" > /dev/null; then
log "Application is healthy after recovery"
return 0
fi
log "Waiting for application to become healthy... ($i/10)"
sleep 30
done
log "Application health check failed after recovery"
return 1
}
# 主流程
main() {
log "Starting disaster recovery process..."
# 检查灾备站点
if ! check_dr_site; then
log "DR site check failed, attempting to activate..."
activate_dr_site
fi
# 执行DNS故障转移
perform_dns_failover
# 验证恢复结果
if verify_recovery; then
log "Disaster recovery completed successfully"
exit 0
else
log "Disaster recovery failed"
exit 1
fi
}
# 执行主流程
main "$@"
defer cancel()
bm.logger.Info("Starting scheduled backup")
if err := bm.BackupDatabase(ctx); err != nil {
bm.logger.WithError(err).Error("Backup failed")
bm.notifyFailure(err)
return
}
if err := bm.CleanupOldBackups(); err != nil {
bm.logger.WithError(err).Warn("Failed to cleanup old backups")
}
bm.logger.Info("Backup completed successfully")
bm.notifySuccess()
}
// 备份数据库
func (bm *BackupManager) BackupDatabase(ctx context.Context) error {
// 创建备份目录
if err := os.MkdirAll(bm.config.BackupDir, 0755); err != nil {
return fmt.Errorf("failed to create backup directory: %w", err)
}
// 生成备份文件名
timestamp := time.Now().Format("20060102_150405")
backupFile := filepath.Join(bm.config.BackupDir, fmt.Sprintf("newapi_backup_%s.sql", timestamp))
compressedFile := backupFile + ".gz"
// 执行pg_dump
cmd := exec.CommandContext(ctx, "pg_dump", bm.config.DatabaseURL,
"--verbose", "--clean", "--no-owner", "--no-privileges",
"--format=custom", "--file="+backupFile)
if err := cmd.Run(); err != nil {
return fmt.Errorf("pg_dump failed: %w", err)
}
// 压缩备份文件
if err := bm.compressFile(backupFile, compressedFile); err != nil {
return fmt.Errorf("failed to compress backup: %w", err)
}
// 删除未压缩文件
os.Remove(backupFile)
// 上传到S3(如果配置了)
if bm.config.S3Bucket != "" {
if err := bm.uploadToS3(compressedFile); err != nil {
bm.logger.WithError(err).Warn("Failed to upload backup to S3")
}
}
return nil
}
// 压缩文件
func (bm *BackupManager) compressFile(src, dst string) error {
cmd := exec.Command("gzip", "-c", src)
output, err := os.Create(dst)
if err != nil {
return err
}
defer output.Close()
cmd.Stdout = output
return cmd.Run()
}
// 上传到S3
func (bm *BackupManager) uploadToS3(filePath string) error {
fileName := filepath.Base(filePath)
s3Key := fmt.Sprintf("database-backups/%s", fileName)
cmd := exec.Command("aws", "s3", "cp", filePath, fmt.Sprintf("s3://%s/%s", bm.config.S3Bucket, s3Key))
return cmd.Run()
}
// 清理旧备份
func (bm *BackupManager) CleanupOldBackups() error {
cutoff := time.Now().AddDate(0, 0, -bm.config.RetentionDays)
return filepath.Walk(bm.config.BackupDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && info.ModTime().Before(cutoff) {
bm.logger.WithField("file", path).Info("Removing old backup")
return os.Remove(path)
}
return nil
})
}
// 通知成功
func (bm *BackupManager) notifySuccess() {
if bm.config.NotifyWebhook != "" {
// 发送成功通知
// 实现webhook通知逻辑
}
}
// 通知失败
func (bm *BackupManager) notifyFailure(err error) {
if bm.config.NotifyWebhook != "" {
// 发送失败通知
// 实现webhook通知逻辑
}
}
graph TB
A[性能优化] --> B[应用层优化]
A --> C[数据库优化]
A --> D[系统层优化]
A --> E[网络优化]
B --> B1[代码优化]
B --> B2[内存管理]
B --> B3[并发优化]
B --> B4[缓存策略]
C --> C1[查询优化]
C --> C2[索引优化]
C --> C3[连接池]
C --> C4[分库分表]
D --> D1[CPU优化]
D --> D2[内存优化]
D --> D3[IO优化]
D --> D4[容器优化]
E --> E1[负载均衡]
E --> E2[CDN加速]
E --> E3[压缩传输]
E --> E4[连接复用]
-- 数据库性能优化脚本
-- scripts/optimize-database.sql
-- 创建复合索引
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_users_email_status ON users(email, status) WHERE status = 1;
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_users_created_at ON users(created_at DESC);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tokens_user_id_status ON tokens(user_id, status) WHERE status = 1;
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_tokens_created_at ON tokens(created_at DESC);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_channels_status_type ON channels(status, type);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_logs_user_id_created_at ON logs(user_id, created_at DESC);
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_logs_created_at_type ON logs(created_at DESC, type);
-- 部分索引(提高效率)
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_active_users ON users(id) WHERE status = 1;
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_active_channels ON channels(id) WHERE status = 1;
-- 表达式索引
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_users_email_lower ON users(LOWER(email));
-- 分区表设计
CREATE TABLE IF NOT EXISTS logs_partitioned (
LIKE logs INCLUDING ALL
) PARTITION BY RANGE (created_at);
-- 自动创建分区的函数
CREATE OR REPLACE FUNCTION create_monthly_partition(table_name text, start_date date)
RETURNS void AS $$
DECLARE
partition_name text;
end_date date;
BEGIN
partition_name := table_name || '_' || to_char(start_date, 'YYYY_MM');
end_date := start_date + interval '1 month';
EXECUTE format('CREATE TABLE IF NOT EXISTS %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
partition_name, table_name, start_date, end_date);
END;
$$ LANGUAGE plpgsql;
-- 创建最近几个月的分区
SELECT create_monthly_partition('logs_partitioned', date_trunc('month', CURRENT_DATE - interval '1 month'));
SELECT create_monthly_partition('logs_partitioned', date_trunc('month', CURRENT_DATE));
SELECT create_monthly_partition('logs_partitioned', date_trunc('month', CURRENT_DATE + interval '1 month'));
-- 更新表统计信息
ANALYZE users;
ANALYZE tokens;
ANALYZE channels;
ANALYZE logs;
-- 查询优化建议
-- 1. 避免SELECT *
-- 2. 使用LIMIT限制结果集
-- 3. 合理使用JOIN
-- 4. 避免在WHERE子句中使用函数