feat:修改类型结构

This commit is contained in:
zyronon
2025-08-06 00:31:01 +08:00
parent fe158b9566
commit 6a330e574f
47 changed files with 467332 additions and 52 deletions

224427
js_node/save/normalList.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

179158
js_node/save/unnormalList.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -28,8 +28,8 @@ async function crawlWord(val, page,) {
try {
await page.goto(url, {waitUntil: 'networkidle', timeout: 15000});
const titleEl = await page.locator('.title').first();
data.word = await titleEl.evaluate(el => el.firstChild?.nodeValue || '');
// const titleEl = await page.locator('.title').first();
// data.word = await titleEl.evaluate(el => el.firstChild?.nodeValue || '');
const phones = await page.$$('.per-phone .phonetic');
if (phones[0]) data.phonetic0 = (await phones[0].textContent())?.trim() || '';
@@ -142,15 +142,20 @@ async function crawlWord(val, page,) {
let removeList = raw.slice()
const resultMap = new Map();
let newFileName = file.replaceAll('.json', '-fetch.json')
const newRaw = JSON.parse(fs.readFileSync(newFileName, 'utf-8'));
newRaw.map(word => {
resultMap.set(word.word, word);
})
try {
const newRaw = JSON.parse(fs.readFileSync(newFileName, 'utf-8'));
console.log('已保存:', newRaw.length);
newRaw.map(word => {
resultMap.set(word.word, word);
})
} catch (e) {
}
for (let i = 0; i < raw.length; i++) {
let word = raw[i];
console.log(`爬取:${file}${word.word},进度:${resultMap.size} / ${raw.length};时间:${dayjs().format('YYYY-MM-DD HH:mm:ss')}`)
console.log(`爬取:${file}${word.word},进度:${i} / ${raw.length};时间:${dayjs().format('YYYY-MM-DD HH:mm:ss')}`)
const result = await crawlWord(word, page);
if (result) {
resultMap.set(word.word, result);