feat: 增加自动识别文件编码的功能

Related to #5
This commit is contained in:
Baobhan Sith
2025-05-03 16:12:28 +08:00
parent 026ed949fb
commit 2e34f4e1df
3 changed files with 82 additions and 3 deletions
+12 -1
View File
@@ -4899,6 +4899,15 @@
"integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==", "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==",
"license": "MIT" "license": "MIT"
}, },
"node_modules/jschardet": {
"version": "3.1.4",
"resolved": "https://registry.npmjs.org/jschardet/-/jschardet-3.1.4.tgz",
"integrity": "sha512-/kmVISmrwVwtyYU40iQUOp3SUPk2dhNCMsZBQX0R1/jZ8maaXJ/oZIzUOiyOqcgtLnETFKYChbJ5iDC/eWmFHg==",
"license": "LGPL-2.1+",
"engines": {
"node": ">=0.1.90"
}
},
"node_modules/json-stable-stringify": { "node_modules/json-stable-stringify": {
"version": "1.3.0", "version": "1.3.0",
"resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.3.0.tgz", "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.3.0.tgz",
@@ -8799,7 +8808,9 @@
"express-session": "^1.18.1", "express-session": "^1.18.1",
"i18next": "^25.0.0", "i18next": "^25.0.0",
"i18next-fs-backend": "^2.6.0", "i18next-fs-backend": "^2.6.0",
"iconv-lite": "^0.6.3",
"ipaddr.js": "^1.9.1", "ipaddr.js": "^1.9.1",
"jschardet": "^3.1.4",
"multer": "^1.4.5-lts.2", "multer": "^1.4.5-lts.2",
"nodemailer": "^6.10.1", "nodemailer": "^6.10.1",
"qrcode": "^1.5.4", "qrcode": "^1.5.4",
@@ -8831,7 +8842,7 @@
}, },
"packages/frontend": { "packages/frontend": {
"name": "@nexus-terminal/frontend", "name": "@nexus-terminal/frontend",
"version": "0.2.2", "version": "0.2.4",
"dependencies": { "dependencies": {
"@fortawesome/fontawesome-free": "^6.7.2", "@fortawesome/fontawesome-free": "^6.7.2",
"@hcaptcha/vue3-hcaptcha": "^1.3.0", "@hcaptcha/vue3-hcaptcha": "^1.3.0",
+2
View File
@@ -23,7 +23,9 @@
"express-session": "^1.18.1", "express-session": "^1.18.1",
"i18next": "^25.0.0", "i18next": "^25.0.0",
"i18next-fs-backend": "^2.6.0", "i18next-fs-backend": "^2.6.0",
"iconv-lite": "^0.6.3",
"ipaddr.js": "^1.9.1", "ipaddr.js": "^1.9.1",
"jschardet": "^3.1.4",
"multer": "^1.4.5-lts.2", "multer": "^1.4.5-lts.2",
"nodemailer": "^6.10.1", "nodemailer": "^6.10.1",
"qrcode": "^1.5.4", "qrcode": "^1.5.4",
+68 -2
View File
@@ -2,6 +2,8 @@ import { Client, SFTPWrapper, Stats, WriteStream } from 'ssh2'; // Import WriteS
import { WebSocket } from 'ws'; import { WebSocket } from 'ws';
import { ClientState } from '../websocket'; // 导入统一的 ClientState import { ClientState } from '../websocket'; // 导入统一的 ClientState
import * as pathModule from 'path'; // +++ Import path module +++ import * as pathModule from 'path'; // +++ Import path module +++
import * as jschardet from 'jschardet'; // +++ Import jschardet +++
import * as iconv from 'iconv-lite'; // +++ Import iconv-lite +++
// +++ Define local interface for readdir results +++ // +++ Define local interface for readdir results +++
interface SftpDirEntry { interface SftpDirEntry {
@@ -214,8 +216,72 @@ export class SftpService {
}); });
readStream.on('end', () => { readStream.on('end', () => {
if (!errorOccurred) { if (!errorOccurred) {
console.log(`[SFTP ${sessionId}] readFile ${path} success, size: ${fileData.length} bytes (ID: ${requestId})`); console.log(`[SFTP ${sessionId}] readFile ${path} success, size: ${fileData.length} bytes (ID: ${requestId}). Detecting encoding...`);
state.ws.send(JSON.stringify({ type: 'sftp:readfile:success', path: path, payload: { content: fileData.toString('base64'), encoding: 'base64' }, requestId: requestId })); let contentUtf8: string;
try {
// 1. Detect encoding
const detection = jschardet.detect(fileData);
const detectedEncoding = detection.encoding.toLowerCase();
const confidence = detection.confidence;
console.log(`[SFTP ${sessionId}] Detected encoding for ${path}: ${detectedEncoding} (confidence: ${confidence})`);
// 2. Decode to UTF-8 with improved logic for low confidence and Chinese encodings
const chineseEncodings = ['gbk', 'gb2312', 'gb18030', 'big5', 'euc-tw']; // Common Chinese/Taiwanese encodings
if (detectedEncoding === 'utf-8' || detectedEncoding === 'ascii') {
contentUtf8 = fileData.toString('utf8');
console.log(`[SFTP ${sessionId}] Decoded ${path} as UTF-8/ASCII.`);
} else if (chineseEncodings.includes(detectedEncoding)) {
// If detected as a common Chinese encoding, trust it and use gb18030 for broader compatibility
contentUtf8 = iconv.decode(fileData, 'gb18030');
console.log(`[SFTP ${sessionId}] Decoded ${path} from detected Chinese encoding (${detectedEncoding}) as gb18030.`);
} else if (confidence < 0.90) { // Low confidence threshold (adjustable, e.g., 0.90 or 0.85)
console.warn(`[SFTP ${sessionId}] Low confidence detection (${detectedEncoding}, ${confidence}) for ${path}. Attempting GB18030 decode first.`);
try {
// Try decoding as GB18030 first for low confidence cases, common for Chinese Windows ANSI
contentUtf8 = iconv.decode(fileData, 'gb18030');
// Basic check for Mojibake (presence of replacement char U+FFFD)
if (contentUtf8.includes('\uFFFD')) {
console.warn(`[SFTP ${sessionId}] GB18030 decoding resulted in replacement characters. Falling back to original detection (${detectedEncoding}) or UTF-8.`);
// Fallback: Try the originally detected encoding if supported, otherwise UTF-8
if (iconv.encodingExists(detectedEncoding)) {
contentUtf8 = iconv.decode(fileData, detectedEncoding);
console.log(`[SFTP ${sessionId}] Falling back to decoding ${path} as originally detected ${detectedEncoding}.`);
} else {
contentUtf8 = fileData.toString('utf8');
console.log(`[SFTP ${sessionId}] Falling back to decoding ${path} as UTF-8.`);
}
} else {
console.log(`[SFTP ${sessionId}] Decoded ${path} as GB18030 due to low confidence detection.`);
}
} catch (gbkError) {
console.warn(`[SFTP ${sessionId}] Error decoding as GB18030, falling back to original detection (${detectedEncoding}) or UTF-8:`, gbkError);
// Fallback: Try the originally detected encoding if supported, otherwise UTF-8
if (iconv.encodingExists(detectedEncoding)) {
contentUtf8 = iconv.decode(fileData, detectedEncoding);
console.log(`[SFTP ${sessionId}] Falling back to decoding ${path} as originally detected ${detectedEncoding}.`);
} else {
contentUtf8 = fileData.toString('utf8');
console.log(`[SFTP ${sessionId}] Falling back to decoding ${path} as UTF-8.`);
}
}
} else if (iconv.encodingExists(detectedEncoding)) {
// Higher confidence, non-Chinese, supported encoding
contentUtf8 = iconv.decode(fileData, detectedEncoding);
console.log(`[SFTP ${sessionId}] Decoded ${path} from ${detectedEncoding} to UTF-8 using iconv-lite (high confidence).`);
} else {
console.warn(`[SFTP ${sessionId}] Unsupported or unknown encoding detected for ${path}: ${detectedEncoding}. Falling back to UTF-8.`);
contentUtf8 = fileData.toString('utf8'); // Final fallback
}
} catch (decodeError: any) {
console.error(`[SFTP ${sessionId}] Error detecting/decoding file ${path} (ID: ${requestId}):`, decodeError);
// Send error if decoding fails
state.ws.send(JSON.stringify({ type: 'sftp:readfile:error', path: path, payload: `文件编码检测或转换失败: ${decodeError.message}`, requestId: requestId }));
return; // Stop further processing
}
// 3. Send UTF-8 content to frontend
state.ws.send(JSON.stringify({ type: 'sftp:readfile:success', path: path, payload: { content: contentUtf8 }, requestId: requestId })); // Send UTF-8 string directly
} }
}); });
} catch (error: any) { } catch (error: any) {