openGemini icon indicating copy to clipboard operation
openGemini copied to clipboard

源文件数据手动恢复之后出现bug

Open guoguo0123456 opened this issue 1 month ago • 1 comments

Describe the bug(Bug 描述)

原meta目录下文件损坏,使用shell脚本恢复数据。顺便将版本号从1.3.1升级到1.4.1。升级和恢复成功之后,数据可以正常插入和查询,数据插入执行一段(一天)时间后,服务自动停止运行,重启之后,启动可以成功,正常插入,执行查询时候会直接报错,服务停止

To Reproduce(Bug 复现步骤)

No response

Expected behavior(期望结果)

No response

Screenshots(屏幕截图)

Image

Logs(完整的错误日志)

{"level":"error","time":"2025-11-19T09:20:00.089545552+08:00","msg":"retry retryCreatePipelineExecutor err ","hostname":"127.0.0.1:8400","error":"measurement not found","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1866' ORDER BY time DESC LIMIT 1","errno":"00414012","location":"coordinator/statement_executor.go:1056","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.089609435+08:00","msg":"execute select statement 400 error","hostname":"127.0.0.1:8400","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1866' ORDER BY time DESC LIMIT 1","error":"measurement not found","duration":0.000095791,"errno":"00414012","location":"coordinator/statement_executor.go:223","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.093157027+08:00","msg":"ctx send fail","hostname":"127.0.0.1:8400","query":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1866' ORDER BY time DESC LIMIT 1","error":"measurement not found","errno":"40414012","location":"query/executor.go:541","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.093579213+08:00","msg":"retry retryCreatePipelineExecutor err ","hostname":"127.0.0.1:8400","error":"measurement not found","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1870' ORDER BY time DESC LIMIT 1","errno":"00414012","location":"coordinator/statement_executor.go:1056","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.093603759+08:00","msg":"execute select statement 400 error","hostname":"127.0.0.1:8400","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1870' ORDER BY time DESC LIMIT 1","error":"measurement not found","duration":0.000038938,"errno":"00414012","location":"coordinator/statement_executor.go:223","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.100949946+08:00","msg":"ctx send fail","hostname":"127.0.0.1:8400","query":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1870' ORDER BY time DESC LIMIT 1","error":"measurement not found","errno":"40414012","location":"query/executor.go:541","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.101381963+08:00","msg":"retry retryCreatePipelineExecutor err ","hostname":"127.0.0.1:8400","error":"measurement not found","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1871' ORDER BY time DESC LIMIT 1","errno":"00414012","location":"coordinator/statement_executor.go:1056","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.101414058+08:00","msg":"execute select statement 400 error","hostname":"127.0.0.1:8400","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1871' ORDER BY time DESC LIMIT 1","error":"measurement not found","duration":0.000043026,"errno":"00414012","location":"coordinator/statement_executor.go:223","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.154271658+08:00","msg":"ctx send fail","hostname":"127.0.0.1:8400","query":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '1871' ORDER BY time DESC LIMIT 1","error":"measurement not found","errno":"40414012","location":"query/executor.go:541","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.154733165+08:00","msg":"retry retryCreatePipelineExecutor err ","hostname":"127.0.0.1:8400","error":"measurement not found","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '2384' ORDER BY time DESC LIMIT 1","errno":"00414012","location":"coordinator/statement_executor.go:1056","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.154763608+08:00","msg":"execute select statement 400 error","hostname":"127.0.0.1:8400","stmt":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '2384' ORDER BY time DESC LIMIT 1","error":"measurement not found","duration":0.000049308,"errno":"00414012","location":"coordinator/statement_executor.go:223","repeated":1} {"level":"error","time":"2025-11-19T09:20:00.199592913+08:00","msg":"ctx send fail","hostname":"127.0.0.1:8400","query":"SELECT * FROM db1850."1850".timepricedata WHERE f_deviceid = '2384' ORDER BY time DESC LIMIT 1","error":"measurement not found","errno":"40414012","location":"query/executor.go:541","repeated":1}

No response

Additional context(其他的一些补充内容)

shell手动恢复原数据脚本

#!/bin/bash

openGemini数据恢复配置目录

原文件路径和备份文件路径

SEARCH_ROOT="/usr/local/openGemini/recory/data/db1850/0/1850" SEARCH_BACKUP_DIR="/usr/local/openGemini/recory/data/db1850/0/1850/backup"

目标文件路径和目标备份文件路径

TARGET_ROOT="/usr/local/openGemini/data/data/db1850/0/1850" TARGET_BACKUP_DIR="/usr/local/openGemini/data/data/db1850/0/1850/backup"

index 子目录

SEARCH_INDEX_DIR="${SEARCH_ROOT}/index" TARGET_INDEX_DIR="${TARGET_ROOT}/index" SEARCH_INDEX_BACKUP_DIR="${SEARCH_BACKUP_DIR}/index" TARGET_INDEX_BACKUP_DIR="${TARGET_BACKUP_DIR}/index"

日志文件

LOG_FILE="/var/log/openGemini_migration_$(date +%Y%m%d_%H%M%S).log"

颜色输出

RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # No Color

日志函数

log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE" }

error_log() { echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR:${NC} $1" | tee -a "$LOG_FILE" }

warn_log() { echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING:${NC} $1" | tee -a "$LOG_FILE" }

info_log() { echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO:${NC} $1" | tee -a "$LOG_FILE" }

检查目录是否存在

check_directories() { log "检查目录..."

if [ ! -d "$SEARCH_ROOT" ]; then
    error_log "源目录不存在: $SEARCH_ROOT"
    exit 1
fi

if [ ! -d "$TARGET_ROOT" ]; then
    error_log "目标目录不存在: $TARGET_ROOT"
    exit 1
fi

# 创建备份目录
mkdir -p "$SEARCH_BACKUP_DIR" || { error_log "无法创建备份目录: $SEARCH_BACKUP_DIR"; exit 1; }
mkdir -p "$TARGET_BACKUP_DIR" || { error_log "无法创建备份目录: $TARGET_BACKUP_DIR"; exit 1; }
mkdir -p "$SEARCH_INDEX_BACKUP_DIR" || { error_log "无法创建索引备份目录: $SEARCH_INDEX_BACKUP_DIR"; exit 1; }
mkdir -p "$TARGET_INDEX_BACKUP_DIR" || { error_log "无法创建索引备份目录: $TARGET_INDEX_BACKUP_DIR"; exit 1; }

log "目录检查完成"

}

提取时间戳范围

extract_time_range() { local folder_name="$1" # 格式: N_timestamp1_timestamp2_N 或 N_timestamp1_timestamp2 # 提取中间两个时间戳 echo "$folder_name" | awk -F'' '{print $2""$3}' }

提取文件夹名的各个部分 (4段式: N_timestamp1_timestamp2_N)

extract_folder_parts() { local folder_name="$1" local prefix=$(echo "$folder_name" | awk -F'' '{print $1}') local start_time=$(echo "$folder_name" | awk -F'' '{print $2}') local end_time=$(echo "$folder_name" | awk -F'' '{print $3}') local suffix=$(echo "$folder_name" | awk -F'' '{print $4}')

echo "$prefix|$start_time|$end_time|$suffix"

}

构建新的文件夹名称(4段式,使用目标文件夹的前后缀,保持源文件夹的时间戳)

build_new_folder_name() { local search_folder_name="$1" local target_folder_name="$2"

# 提取源文件夹的时间戳
local search_parts=$(extract_folder_parts "$search_folder_name")
local search_start=$(echo "$search_parts" | cut -d'|' -f2)
local search_end=$(echo "$search_parts" | cut -d'|' -f3)

# 提取目标文件夹的前后缀
local target_parts=$(extract_folder_parts "$target_folder_name")
local target_prefix=$(echo "$target_parts" | cut -d'|' -f1)
local target_suffix=$(echo "$target_parts" | cut -d'|' -f4)

# 构建新名称: 目标前缀_源时间戳1_源时间戳2_目标后缀
echo "${target_prefix}_${search_start}_${search_end}_${target_suffix}"

}

提取index文件夹名的各个部分(3段式: N_timestamp1_timestamp2)

extract_index_parts() { local folder_name="$1" local prefix=$(echo "$folder_name" | awk -F'' '{print $1}') local start_time=$(echo "$folder_name" | awk -F'' '{print $2}') local end_time=$(echo "$folder_name" | awk -F'_' '{print $3}')

echo "$prefix|$start_time|$end_time"

}

构建新的index文件夹名称(3段式)

build_new_index_name() { local search_folder_name="$1" local target_folder_name="$2"

# 提取源文件夹的时间戳
local search_parts=$(extract_index_parts "$search_folder_name")
local search_start=$(echo "$search_parts" | cut -d'|' -f2)
local search_end=$(echo "$search_parts" | cut -d'|' -f3)

# 提取目标文件夹的前缀
local target_parts=$(extract_index_parts "$target_folder_name")
local target_prefix=$(echo "$target_parts" | cut -d'|' -f1)

# 构建新名称: 目标前缀_源时间戳1_源时间戳2
echo "${target_prefix}_${search_start}_${search_end}"

}

处理index文件夹迁移

process_index_folders() { local search_folder="$1" local search_folder_name=$(basename "$search_folder")

info_log "  处理index文件夹: $search_folder_name"

# 提取时间范围
local time_range=$(extract_time_range "$search_folder_name")

if [ -z "$time_range" ]; then
    warn_log "  无法提取时间范围: $search_folder_name,跳过"
    return 1
fi

info_log "    时间范围: $time_range"

# 在目标index目录查找匹配的文件夹
local target_folder=$(find "$TARGET_INDEX_DIR" -maxdepth 1 -type d -name "*_${time_range}" 2>/dev/null | head -n 1)

if [ -z "$target_folder" ]; then
    warn_log "    未找到匹配的目标index文件夹,时间范围: $time_range"
    return 1
fi

local target_folder_name=$(basename "$target_folder")
info_log "    找到目标index文件夹: $target_folder_name"

# 构建新的文件夹名称
local new_folder_name=$(build_new_index_name "$search_folder_name" "$target_folder_name")
info_log "    新index文件夹名称: $new_folder_name"

# 备份目标文件夹
info_log "    备份目标index文件夹到: $TARGET_INDEX_BACKUP_DIR/$target_folder_name"
if ! mv "$target_folder" "$TARGET_INDEX_BACKUP_DIR/"; then
    error_log "    备份目标index文件夹失败: $target_folder_name"
    return 1
fi

# 复制源文件夹到目标目录,并使用新名称
info_log "    复制源index文件夹到目标目录: $TARGET_INDEX_DIR/$new_folder_name"
if ! cp -r "$search_folder" "$TARGET_INDEX_DIR/$new_folder_name"; then
    error_log "    复制源index文件夹失败,尝试恢复目标文件夹"
    mv "$TARGET_INDEX_BACKUP_DIR/$target_folder_name" "$TARGET_INDEX_DIR/"
    return 1
fi

# 移动源文件夹到备份目录
info_log "    移动源index文件夹到备份目录: $SEARCH_INDEX_BACKUP_DIR/$search_folder_name"
if ! mv "$search_folder" "$SEARCH_INDEX_BACKUP_DIR/"; then
    error_log "    移动源index文件夹失败,但复制已完成"
fi

info_log "    ✓ 成功处理index文件夹:"
info_log "      源文件夹: $search_folder_name"
info_log "      目标文件夹(已备份): $target_folder_name"
info_log "      新文件夹: $new_folder_name"

echo "INDEX|$search_folder_name|$target_folder_name|$new_folder_name|$time_range|$(date)" >> "$LOG_FILE.success"

return 0

}

处理文件夹迁移

process_folders() { local search_folder="$1" local search_folder_name=$(basename "$search_folder")

log "处理源文件夹: $search_folder_name"

# 提取时间范围
local time_range=$(extract_time_range "$search_folder_name")

if [ -z "$time_range" ]; then
    warn_log "无法提取时间范围: $search_folder_name,跳过"
    return 1
fi

log "  时间范围: $time_range"

# 在目标目录查找匹配的文件夹
local target_folder=$(find "$TARGET_ROOT" -maxdepth 1 -type d -name "*_${time_range}_*" | head -n 1)

if [ -z "$target_folder" ]; then
    warn_log "  未找到匹配的目标文件夹,时间范围: $time_range"
    return 1
fi

local target_folder_name=$(basename "$target_folder")
log "  找到目标文件夹: $target_folder_name"

# 构建新的文件夹名称
local new_folder_name=$(build_new_folder_name "$search_folder_name" "$target_folder_name")
log "  新文件夹名称: $new_folder_name"

# 备份目标文件夹
log "  备份目标文件夹到: $TARGET_BACKUP_DIR/$target_folder_name"
if ! mv "$target_folder" "$TARGET_BACKUP_DIR/"; then
    error_log "  备份目标文件夹失败: $target_folder_name"
    return 1
fi

# 复制源文件夹到目标目录,并使用新名称
log "  复制源文件夹到目标目录: $TARGET_ROOT/$new_folder_name"
if ! cp -r "$search_folder" "$TARGET_ROOT/$new_folder_name"; then
    error_log "  复制源文件夹失败,尝试恢复目标文件夹"
    mv "$TARGET_BACKUP_DIR/$target_folder_name" "$TARGET_ROOT/"
    return 1
fi

# 移动源文件夹到备份目录
log "  移动源文件夹到备份目录: $SEARCH_BACKUP_DIR/$search_folder_name"
if ! mv "$search_folder" "$SEARCH_BACKUP_DIR/"; then
    error_log "  移动源文件夹失败,但复制已完成"
    # 这种情况虽然源文件夹没移走,但迁移已成功,不算完全失败
fi

log "  ✓ 成功处理:"
log "    源文件夹: $search_folder_name"
log "    目标文件夹(已备份): $target_folder_name"
log "    新文件夹: $new_folder_name"

echo "FOLDER|$search_folder_name|$target_folder_name|$new_folder_name|$time_range|$(date)" >> "$LOG_FILE.success"

return 0

}

主函数

main() { log "========== 开始数据迁移 ==========" log "源目录: $SEARCH_ROOT" log "目标目录: $TARGET_ROOT" log "源备份目录: $SEARCH_BACKUP_DIR" log "目标备份目录: $TARGET_BACKUP_DIR" log "源索引目录: $SEARCH_INDEX_DIR" log "目标索引目录: $TARGET_INDEX_DIR" log "日志文件: $LOG_FILE" echo ""

# 检查目录
check_directories

# 统计变量
local total_folders=0
local success_folders=0
local failed_folders=0

local total_index=0
local success_index=0
local failed_index=0

# ========== 处理文件夹 ==========
log "========== 开始处理数据文件夹 =========="

for search_folder in "$SEARCH_ROOT"/*; do
    # 跳过index目录和backup目录
    local folder_name=$(basename "$search_folder")
    if [ "$folder_name" = "index" ] || [ "$folder_name" = "backup" ]; then
        continue
    fi
    
    if [ -d "$search_folder" ]; then
        total_folders=$((total_folders + 1))
        
        if process_folders "$search_folder"; then
            success_folders=$((success_folders + 1))
        else
            failed_folders=$((failed_folders + 1))
        fi
        
        echo ""
    fi
done

# ========== 处理index文件夹 ==========
log "========== 开始处理index文件夹 =========="

if [ -d "$SEARCH_INDEX_DIR" ] && [ -d "$TARGET_INDEX_DIR" ]; then
    for search_folder in "$SEARCH_INDEX_DIR"/*; do
        if [ -d "$search_folder" ]; then
            total_index=$((total_index + 1))
            
            if process_index_folders "$search_folder"; then
                success_index=$((success_index + 1))
            else
                failed_index=$((failed_index + 1))
            fi
            
            echo ""
        fi
    done
else
    warn_log "index目录不存在,跳过index文件夹处理"
    if [ ! -d "$SEARCH_INDEX_DIR" ]; then
        warn_log "源index目录不存在: $SEARCH_INDEX_DIR"
    fi
    if [ ! -d "$TARGET_INDEX_DIR" ]; then
        warn_log "目标index目录不存在: $TARGET_INDEX_DIR"
    fi
fi

# 输出统计结果
log "========== 迁移完成 =========="
log ""
log "数据文件夹统计:"
log "  总计: $total_folders"
log "  成功: $success_folders"
log "  失败: $failed_folders"
log ""
log "Index文件夹统计:"
log "  总计: $total_index"
log "  成功: $success_index"
log "  失败: $failed_index"
log ""
log "详细日志: $LOG_FILE"
log "成功记录: $LOG_FILE.success"

if [ -f "$LOG_FILE.success" ]; then
    log ""
    log "成功迁移的项目:"
    while IFS='|' read -r type search_name target_name new_name time_range timestamp; do
        if [ "$type" = "FOLDER" ]; then
            log "  [数据文件夹] $search_name → $new_name (替换 $target_name)"
        elif [ "$type" = "INDEX" ]; then
            log "  [Index文件夹] $search_name → $new_name (替换 $target_name)"
        fi
    done < "$LOG_FILE.success"
fi

local total_failed=$((failed_folders + failed_index))
if [ $total_failed -gt 0 ]; then
    warn_log "部分项目处理失败,请检查日志"
    exit 1
fi

log ""
log "所有迁移任务已成功完成!"
exit 0

}

执行主函数

main

No response

guoguo0123456 avatar Nov 19 '25 06:11 guoguo0123456

[data.compact]
correct-time-disorder = true

数据文件因为各种原因,文件之间出现时间乱序了 通过上述配置项,内核会自动修正这种错乱 这个配置项可能会带来额外的 CPU和内存的开销,开启一段时间,业务稳定后可以重新关闭。

fx408 avatar Nov 19 '25 07:11 fx408

解决了,就close吧

vicky-run avatar Nov 20 '25 11:11 vicky-run