From 329d52f89f0680b74bd6aaaba401decca62f8857 Mon Sep 17 00:00:00 2001 From: yinjianm Date: Tue, 28 Apr 2026 01:44:07 +0800 Subject: [PATCH] =?UTF-8?q?fix(api):=20=E4=BF=AE=E5=A4=8D=E5=A2=99?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=E4=BB=BB=E5=8A=A1=E8=B6=85=E6=97=B6=E5=8D=A0?= =?UTF-8?q?=E7=94=A8=E4=B8=8E=E7=8A=B6=E6=80=81=E5=B1=95=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 为超过 5 分钟未领取或未上报的 pending/checking 任务自动标记失败,避免长期占用 active 状态并阻塞新检测 同时区分前端“等待节点领取”和“检测中”展示, 补充跳过原因提示,并更新相关测试与文档 --- .helloagents/CHANGELOG.md | 7 ++ .helloagents/modules/node-gfw-check.md | 4 +- admin-frontend/src/utils/nodes.ts | 14 +++- admin-frontend/src/views/nodes/NodesView.vue | 5 +- app/Console/Commands/SyncServerGfwChecks.php | 5 +- app/Services/ServerGfwCheckService.php | 67 ++++++++++++++++++-- tests/Unit/ServerGfwCheckServiceTest.php | 19 +++++- 7 files changed, 104 insertions(+), 17 deletions(-) diff --git a/.helloagents/CHANGELOG.md b/.helloagents/CHANGELOG.md index 5361a9a..7dc09ae 100644 --- a/.helloagents/CHANGELOG.md +++ b/.helloagents/CHANGELOG.md @@ -1,5 +1,12 @@ # CHANGELOG +## [0.6.4] - 2026-04-28 + +### 修复 +- **[node-gfw-check]**: 修复墙检测任务卡在 `pending/checking` 后会长期占用 active 状态的问题;超过 5 分钟未被节点端领取或未上报的任务会标记为检测失败,管理端区分展示“等待节点领取”和“检测中”。同时修正 mi-node 的 ping 成功判定,避免正常可达但平均延迟解析不到时被误判为超时 — by yinjianm + - 类型: 快速修改(无方案包) + - 文件: app/Services/ServerGfwCheckService.php, app/Console/Commands/SyncServerGfwChecks.php, admin-frontend/src/utils/nodes.ts, admin-frontend/src/views/nodes/NodesView.vue, E:/code/go/mi-node/internal/gfwcheck/gfwcheck.go + ## [0.6.3] - 2026-04-28 ### 新增 diff --git a/.helloagents/modules/node-gfw-check.md b/.helloagents/modules/node-gfw-check.md index ea32f78..93276e3 100644 --- a/.helloagents/modules/node-gfw-check.md +++ b/.helloagents/modules/node-gfw-check.md @@ -13,7 +13,7 @@ - 子节点列表展示继承父节点最新 `gfw_check`,并返回 `inherited=true` 与 `source_node_id` - `server_gfw_checks.status` 使用 `pending / checking / normal / blocked / partial / failed / skipped` - 管理端 `POST server/manage/checkGfw` 接收 `{ ids: number[] }`,响应中区分 `started` 与 `skipped` -- 后端定时命令 `sync:server-gfw-checks` 会自动为 `gfw_check_enabled=1` 的父节点创建检测任务;已有 `pending/checking` 任务时跳过,避免重复检测 +- 后端定时命令 `sync:server-gfw-checks` 会自动为 `gfw_check_enabled=1` 的父节点创建检测任务;已有未超时的 `pending/checking` 任务时跳过,超过 5 分钟未领取或未上报的任务会自动标记为 `failed` - 节点端 `GET server/gfw/task` 只向父节点返回待执行任务;节点端 `POST server/gfw/report` 必须校验 `check_id` 归属当前节点 - `v2_server.gfw_check_enabled` 控制节点是否参与自动墙检测与墙状态自动显隐;父节点开启时会自动创建检测任务,子节点不独立检测但可单独关闭随父节点自动隐藏 / 恢复 - `blocked` 结果会自动隐藏仍开启墙检测托管且当前显示中的父节点及其子节点,并设置 `gfw_auto_hidden=1` @@ -21,7 +21,7 @@ - `sync:server-auto-online` 会把最新墙状态 `blocked` 和未恢复的 `gfw_auto_hidden` 作为显示否决条件,防止自动上线重新发布疑似被墙节点 - 当前检测方向只做节点服务器主动 ping 国内三网目标;后续墙内探测 IP 可在同一任务模型中扩展 - 参考脚本中的 Telegram 通知、chat_id、bot token 和自动安装依赖逻辑不得进入项目实现 -- mi-node 使用 Go 原生 runner 调用系统 `ping`,按三网目标并发检测并结构化上报 `summary / operator_summary / raw_result` +- mi-node 使用 Go 原生 runner 调用系统 `ping`,按三网目标并发检测并结构化上报 `summary / operator_summary / raw_result`;ping 命令成功即视为目标可达,平均延迟解析失败不再把正常可达误判为超时 - Docker runtime 镜像需要提供 `ping`,当前通过 Alpine `iputils` 满足 ## 依赖关系 diff --git a/admin-frontend/src/utils/nodes.ts b/admin-frontend/src/utils/nodes.ts index 49dc05f..14dcaa0 100644 --- a/admin-frontend/src/utils/nodes.ts +++ b/admin-frontend/src/utils/nodes.ts @@ -120,10 +120,20 @@ export function getNodeGfwMeta(node: AdminNodeItem): NodeGfwMeta { } } - if (status === 'pending' || status === 'checking') { + if (status === 'pending') { + return { + label: `${inheritedPrefix}等待节点领取`, + searchText: `${inherited ? '随父节点 继承 ' : ''}等待节点领取 等待检测 gfw pending`, + tagType: 'primary', + tone: 'checking', + inherited, + } + } + + if (status === 'checking') { return { label: `${inheritedPrefix}检测中`, - searchText: `${inherited ? '随父节点 继承 ' : ''}检测中 等待检测 gfw checking pending`, + searchText: `${inherited ? '随父节点 继承 ' : ''}检测中 正在检测 gfw checking`, tagType: 'primary', tone: 'checking', inherited, diff --git a/admin-frontend/src/views/nodes/NodesView.vue b/admin-frontend/src/views/nodes/NodesView.vue index cb07d88..c5032c0 100644 --- a/admin-frontend/src/views/nodes/NodesView.vue +++ b/admin-frontend/src/views/nodes/NodesView.vue @@ -376,7 +376,8 @@ async function handleCheckGfw(ids: number[], label: string) { if (started > 0) { ElMessage.success(`${label}已发起墙状态检测,${started} 个父节点等待上报`) } else if (skipped > 0) { - ElMessage.info('所选节点均为子节点,墙状态随父节点显示') + const reason = response.data?.skipped?.[0]?.reason + ElMessage.info(reason || '所选节点暂未发起新的墙状态检测') } else { ElMessage.info('没有可检测的节点') } @@ -666,7 +667,7 @@ watch( - + diff --git a/app/Console/Commands/SyncServerGfwChecks.php b/app/Console/Commands/SyncServerGfwChecks.php index bdc6ab1..bed8194 100644 --- a/app/Console/Commands/SyncServerGfwChecks.php +++ b/app/Console/Commands/SyncServerGfwChecks.php @@ -19,11 +19,12 @@ class SyncServerGfwChecks extends Command ); $this->info(sprintf( - 'Server GFW checks synced: total=%d started=%d skipped=%d active=%d', + 'Server GFW checks synced: total=%d started=%d skipped=%d active=%d expired=%d', $result['total'], count($result['started']), count($result['skipped']), - $result['active'] + $result['active'], + $result['expired'] ?? 0 )); return self::SUCCESS; diff --git a/app/Services/ServerGfwCheckService.php b/app/Services/ServerGfwCheckService.php index 437e1da..13ea596 100644 --- a/app/Services/ServerGfwCheckService.php +++ b/app/Services/ServerGfwCheckService.php @@ -8,6 +8,8 @@ use Illuminate\Support\Collection; class ServerGfwCheckService { + private const ACTIVE_TASK_TIMEOUT_SECONDS = 300; + private const TASK_STATUS = [ ServerGfwCheck::STATUS_PENDING, ServerGfwCheck::STATUS_CHECKING, @@ -17,6 +19,8 @@ class ServerGfwCheckService { $ids = array_values(array_unique(array_filter(array_map('intval', $ids)))); $servers = Server::whereIn('id', $ids)->get()->keyBy('id'); + $this->expireStaleActiveTasks($ids); + $activeLookup = $this->activeTaskServerLookup($ids); $started = []; $skipped = []; @@ -46,6 +50,15 @@ class ServerGfwCheckService continue; } + if (isset($activeLookup[(int) $server->id])) { + $skipped[] = [ + 'id' => $id, + 'status' => ServerGfwCheck::STATUS_SKIPPED, + 'reason' => '已有检测任务等待节点领取或上报', + ]; + continue; + } + $check = $this->createCheck($server, $adminUserId); $started[] = [ 'id' => $server->id, @@ -74,12 +87,8 @@ class ServerGfwCheckService } $servers = $query->get(); - $activeServerIds = ServerGfwCheck::whereIn('server_id', $servers->pluck('id')) - ->whereIn('status', self::TASK_STATUS) - ->pluck('server_id') - ->map(fn ($id) => (int) $id) - ->all(); - $activeLookup = array_flip($activeServerIds); + $expired = $this->expireStaleActiveTasks($servers->pluck('id')); + $activeLookup = $this->activeTaskServerLookup($servers->pluck('id')); $started = []; $skipped = []; @@ -105,7 +114,8 @@ class ServerGfwCheckService 'started' => $started, 'skipped' => $skipped, 'total' => $servers->count(), - 'active' => count($activeServerIds), + 'active' => count($activeLookup), + 'expired' => $expired, ]; } @@ -169,6 +179,8 @@ class ServerGfwCheckService return null; } + $this->expireStaleActiveTasks([$node->id]); + $check = ServerGfwCheck::where('server_id', $node->id) ->whereIn('status', self::TASK_STATUS) ->orderByDesc('id') @@ -185,6 +197,47 @@ class ServerGfwCheckService return $this->formatTask($check->refresh()); } + private function activeTaskServerLookup($serverIds): array + { + $ids = collect($serverIds) + ->map(fn ($id) => (int) $id) + ->filter() + ->unique() + ->values(); + + if ($ids->isEmpty()) { + return []; + } + + return array_flip(ServerGfwCheck::whereIn('server_id', $ids) + ->whereIn('status', self::TASK_STATUS) + ->pluck('server_id') + ->map(fn ($id) => (int) $id) + ->all()); + } + + private function expireStaleActiveTasks($serverIds): int + { + $ids = collect($serverIds) + ->map(fn ($id) => (int) $id) + ->filter() + ->unique() + ->values(); + + if ($ids->isEmpty()) { + return 0; + } + + return ServerGfwCheck::whereIn('server_id', $ids) + ->whereIn('status', self::TASK_STATUS) + ->where('updated_at', '<=', now()->subSeconds(self::ACTIVE_TASK_TIMEOUT_SECONDS)) + ->update([ + 'status' => ServerGfwCheck::STATUS_FAILED, + 'error_message' => '墙检测任务超时:节点端未领取或未上报结果', + 'checked_at' => time(), + ]); + } + public function reportResult(Server $node, array $payload): bool { $checkId = (int) ($payload['check_id'] ?? 0); diff --git a/tests/Unit/ServerGfwCheckServiceTest.php b/tests/Unit/ServerGfwCheckServiceTest.php index bd4297f..1741249 100644 --- a/tests/Unit/ServerGfwCheckServiceTest.php +++ b/tests/Unit/ServerGfwCheckServiceTest.php @@ -16,6 +16,7 @@ class ServerGfwCheckServiceTest extends TestCase { $eligible = $this->makeServer(['name' => 'eligible-parent']); $active = $this->makeServer(['name' => 'active-parent']); + $stale = $this->makeServer(['name' => 'stale-parent']); $this->makeServer([ 'name' => 'disabled-parent', 'gfw_check_enabled' => false, @@ -29,17 +30,31 @@ class ServerGfwCheckServiceTest extends TestCase 'server_id' => $active->id, 'status' => ServerGfwCheck::STATUS_PENDING, ]); + $staleCheck = ServerGfwCheck::create([ + 'server_id' => $stale->id, + 'status' => ServerGfwCheck::STATUS_PENDING, + ]); + $staleCheck->forceFill([ + 'created_at' => now()->subMinutes(10), + 'updated_at' => now()->subMinutes(10), + ])->save(); $result = app(ServerGfwCheckService::class)->startAutomaticChecks(); - $this->assertSame(2, $result['total']); + $this->assertSame(3, $result['total']); $this->assertSame(1, $result['active']); - $this->assertSame([$eligible->id], array_column($result['started'], 'id')); + $this->assertSame(1, $result['expired']); + $this->assertSame([$eligible->id, $stale->id], array_column($result['started'], 'id')); $this->assertCount(1, $result['skipped']); $this->assertDatabaseHas('server_gfw_checks', [ 'server_id' => $eligible->id, 'status' => ServerGfwCheck::STATUS_PENDING, ]); + $this->assertDatabaseHas('server_gfw_checks', [ + 'id' => $staleCheck->id, + 'status' => ServerGfwCheck::STATUS_FAILED, + 'error_message' => '墙检测任务超时:节点端未领取或未上报结果', + ]); } public function test_report_result_hides_blocked_nodes_and_restores_only_auto_hidden_nodes(): void