feat: update dict

This commit is contained in:
王念超
2024-06-12 18:35:34 +08:00
parent 0dd415c2f3
commit 0a6ccda581
72 changed files with 109 additions and 486 deletions

View File

@@ -1,65 +0,0 @@
let path = require("path");
let fs = require("fs");
const str = fs.readFileSync("../public/dicts/en/common/translate/en2zh_CN.json", "utf8");
let translateDict = JSON.parse(str);
let ts = []
console.log(translateDict.length)
let pathName = "../public/dicts/en/common";
// let pathName = "./d";
//判断是不是目录
const dirs = fs.readdirSync(pathName)
dirs.forEach(dictName => {
if (!dictName.includes('en2zh_CN.json') || !dictName.includes('ts.json')) {
let dictPath = path.join(pathName, dictName)
// console.log('d', dictPath)
formatDict(dictPath)
}
})
fs.writeFileSync(
"../public/dicts/en/common/translate/ts.json",
JSON.stringify(ts, null, 2)
);
console.log(ts.length)
function formatDict(path) {
try {
const str = fs.readFileSync(path, "utf8");
let dicts = JSON.parse(str);
dicts = dicts.map(v => {
if (!translateDict.find(w => w.name === v.name)) {
if (!v.trans) {
console.log(path, v)
}
let data = {
"name": "",
"trans": [],
usphone: '',
ukphone: '',
...v
}
translateDict.push(data)
ts.push(data)
}
// delete v.trans
// delete v.usphone
// delete v.ukphone
return v.name
})
// let newDicts = Array.from(new Set(dicts))
// console.log(dicts.length);
// console.log(newDicts.length);
// fs.writeFileSync(
// path,
// JSON.stringify(newDicts, null, 2)
// );
} catch (e) {
// console.log('err', e)
}
}

View File

@@ -1,103 +0,0 @@
let path = require("path");
let fs = require("fs");
let fileName = 'CET4_1.json'
let read = './dict/'
let save = "./format/";
//判断是不是目录
const dirs = fs.readdirSync(read)
dirs.forEach(dictName => {
formatDict(read, dictName)
})
// formatDict(read, fileName)
function formatDict(path, name) {
try {
let newObj = []
let str = fs.readFileSync(path + name, "utf8");
let replace = [
['é', 'e'], ['ê', 'e'], ['è', 'e'], ['ë', 'e'], ['à', 'a'], ['â', 'a'], ['ç', 'c'], ['î', 'i'], ['ï', 'i'], ['ô', 'o'], ['ù', 'u'], ['û', 'u'], ['ü', 'u'], ['ÿ', 'y']
]
replace.map(v => {
str = str.replaceAll(v[0], v[1])
})
let list = str.split('\n')
list.map(v => {
if (!v) return
let item = JSON.parse(v)
// console.log('v', item.headWord)
let data = {
id: item.wordRank,
word: item.headWord,
trans: [],
phonetic0: item.content.word.content.usphone,
phonetic1: item.content.word.content.ukphone,
sentences: [],
relWords: [],
phrases: [],
synos: [],
memory: ''
}
if (item.content.word.content.trans) {
data.trans = item.content.word.content.trans.map(s => {
return {
pos: s.pos,
cn: s.tranCn,
en: s.tranOther
}
})
}
if (item.content.word.content.sentence) {
data.sentences = item.content.word.content.sentence.sentences.map(s => {
return {
v: s.sCn, tran: s.sContent,
}
})
}
if (item.content.word.content.remMethod) {
data.memory = item.content.word.content.remMethod.val
}
if (item.content.word.content.relWord) {
data.relWords = item.content.word.content.relWord.rels.map(s => {
return {
"pos": s.pos,
"ws": s.words.map(a => {
return {
w: a.hwd,
tran: a.tran
}
})
}
})
}
if (item.content.word.content.phrase) {
data.phrases = item.content.word.content.phrase.phrases.map(s => {
return {
v: s.pCn,
tran: s.pContent,
}
})
}
if (item.content.word.content.syno) {
data.synos = item.content.word.content.syno.synos.map(s => {
return {
pos: s.pos,
tran: s.tran,
ws: s.hwds
}
})
}
newObj.push(data)
})
fs.writeFileSync(save + name, JSON.stringify(newObj, null, 2));
// fs.writeFileSync(save + name.replace('.json', '.min.json'), JSON.stringify(newObj));
console.log(name, newObj.length)
} catch (e) {
console.log('err', name, e)
}
}

View File

@@ -1,58 +0,0 @@
let path = require("path");
let fs = require("fs");
let fileName = '../public/translate/en2zh_CN.json'
let fileNameWords = '../public/translate/en2zh_CN.words.json'
let read = '../public/dicts/en/word/common/'
let save = "./format2/";
let not = []
let str = fs.readFileSync(fileNameWords, "utf8");
let words = JSON.parse(str)
str = fs.readFileSync(fileName, "utf8");
let allWords = JSON.parse(str)
//判断是不是目录
// const dirs = fs.readdirSync(read)
// dirs.forEach(dictName => {
// formatDict(read, dictName)
// })
// formatDict(read, '2024HongBao_T1.json')
fs.writeFileSync(save + 'not.json', JSON.stringify(not.filter(v => v.list.length), null, 2));
function formatDict(path, name) {
try {
let newObj = []
let str = fs.readFileSync(path + name, "utf8");
let list = JSON.parse(str)
let notListCurrent = {
name,
list: []
}
list.map(a => {
let rIndex = words.findIndex(v => v === a.word)
if (rIndex === -1) {
notListCurrent.list.push(a.word)
newObj.push(a)
} else {
let data = {
...a,
...allWords[rIndex]
}
newObj.push(data)
}
})
not.push(notListCurrent)
fs.writeFileSync(save + name, JSON.stringify(newObj, null, 2));
// fs.writeFileSync(save + name.replace('.json', '.min.json'), JSON.stringify(newObj));
console.log(`当前字典${name},长度:${list.length},没有翻译的:${notListCurrent.list.length}`)
} catch (e) {
console.log('err', name, e)
}
}

View File

@@ -1,122 +0,0 @@
const { log } = require("console");
const fs = require("fs");
try {
const str = fs.readFileSync("./public/dicts/coca20000.json", "utf8");
let dicts = JSON.parse(str);
console.log(dicts.length);
// let a = [
// "vt.",
// "vi.",
// "pron.",
// "adj.",
// "adv.",
// "num.",
// "interj.",
// "art.",
// "aux.",
// "conj.",
// "prep.",
// // "n.",
// // "v.",
// ];
// dicts.map((v) => {
// v.trans = v.trans.map((s) => {
// s = s.replaceAll(",", "");
// a.map((b) => {
// s = s.replaceAll(b, `^${b}`);
// });
// if (s[0] === "^") {
// s = s.substr(1);
// }
// s = s.split("^").filter((v) => v);
// let last = null;
// s = s.reduce((p, c, i, a) => {
// if (last) {
// c = last + c;
// p.push(c);
// last = null;
// } else {
// if (c.includes("& ")) {
// last = c;
// } else {
// p.push(c);
// last = null;
// }
// }
// return p;
// }, []);
// s = s.map((d) => {
// let r1 = d.indexOf("adv.");
// if (r1 > -1) {
// let t2 = d.substr(r1 + 4).replaceAll("v.", `^v.`);
// t2 = t2.split("^").filter((v) => v);
// t2[0] = d.substr(0, r1 + 4) + t2[0];
// d = t2;
// } else {
// let t2 = d.replaceAll("v.", `^v.`);
// t2 = t2.split("^").filter((v) => v);
// d = t2;
// }
// return d;
// });
// s = s.flat();
// s = s.map((d) => {
// let r1 = d.indexOf("pron.");
// if (r1 > -1) {
// let t2 = d.substr(r1 + 5).replaceAll("n.", `^n.`);
// t2 = t2.split("^").filter((v) => v);
// t2[0] = d.substr(0, r1 + 5) + t2[0];
// d = t2;
// } else {
// let t2 = d.replaceAll("n.", `^n.`);
// t2 = t2.split("^").filter((v) => v);
// d = t2;
// }
// return d;
// });
// s = s.flat();
// return s;
// });
// v.trans = v.trans.flat();
// return v;
// });
// console.log(dicts);
// let newDict = [];
// dicts.map((v) => {
// if (!newDict.find((s) => s.name === v.name)) {
// newDict.push(v);
// }
// });
// console.log(newDict.length);
dicts.map((v) => {
v.trans = v.trans.map((d) => {
let t2 = d.replaceAll("int.", `^int.`);
t2 = t2.split("^").filter((v) => v);
d = t2;
return d;
});
v.trans = v.trans.flat();
return v;
});
fs.writeFileSync(
"./public/dicts/coca20000.json",
JSON.stringify(dicts, null, 2)
);
} catch (err) {
console.error(err);
}

View File

@@ -1,42 +0,0 @@
let path = require("path");
let fs = require("fs");
const str = fs.readFileSync("../public/dicts/en/common/translate/en2zh_CN.json", "utf8");
let translateDict = JSON.parse(str);
let pathName = "../public/dicts/en/common";
// let pathName = "./d";
const dirs = fs.readdirSync(pathName)
dirs.forEach(dictName => {
if (!dictName.includes('en2zh_CN.json') || !dictName.includes('ts.json')) {
let dictPath = path.join(pathName, dictName)
// console.log('d', dictPath)
formatDict(dictPath)
}
})
function formatDict(path) {
try {
const str = fs.readFileSync(path, "utf8");
let dicts = JSON.parse(str);
dicts = dicts.map(v => {
let data = {
"name": v.name,
"trans": [],
usphone: '',
ukphone: '',
}
return data
})
let newDicts = Array.from(new Set(dicts))
fs.writeFileSync(
path,
JSON.stringify(newDicts, null, 2)
);
} catch (e) {
// console.log('err', e)
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,58 +0,0 @@
let path = require("path");
let fs = require("fs");
let fileName = '../public/translate/en2zh_CN.json'
let fileNameWords = '../public/translate/en2zh_CN.words.json'
let read = '../format/'
let save = "./format/";
let not = []
let str = fs.readFileSync(fileNameWords, "utf8");
let words = JSON.parse(str)
str = fs.readFileSync(fileName, "utf8");
let allWords = JSON.parse(str)
//判断是不是目录
// const dirs = fs.readdirSync(read)
// dirs.forEach(dictName => {
// formatDict(read, dictName)
// })
// formatDict(read, '2024HongBao_T1.json')
fs.writeFileSync(save + 'not.json', JSON.stringify(not.filter(v => v.list.length), null, 2));
function formatDict(path, name) {
try {
let newObj = []
let str = fs.readFileSync(path + name, "utf8");
let list = JSON.parse(str)
let notListCurrent = {
name,
list: []
}
list.map(a => {
let rIndex = words.findIndex(v => v === a.word)
if (rIndex === -1) {
notListCurrent.list.push(a.word)
newObj.push(a)
} else {
let data = {
...a,
...allWords[rIndex]
}
newObj.push(data)
}
})
not.push(notListCurrent)
fs.writeFileSync(save + name, JSON.stringify(newObj, null, 2));
// fs.writeFileSync(save + name.replace('.json', '.min.json'), JSON.stringify(newObj));
console.log(`当前字典${name},长度:${list.length},没有翻译的:${notListCurrent.list.length}`)
} catch (e) {
console.log('err', name, e)
}
}

View File

@@ -1,32 +0,0 @@
let path = require("path");
let fs = require("fs");
let fileName = '../public/translate/en2zh_CN.json'
let fileNameWords = '../public/translate/en2zh_CN.words.json'
let read = './format/'
let save = "./format2/";
let standardDictNames = []
//判断是不是目录
const dirs = fs.readdirSync(read)
dirs.forEach(dictName => {
formatDict(read, dictName)
})
// formatDict(read, 'BEC_2.min.json')
fs.writeFileSync(save + 'standardDictNames.json', JSON.stringify(standardDictNames, null, 2));
function formatDict(path, name) {
try {
let str = fs.readFileSync(path + name, "utf8");
let list = JSON.parse(str)
let standardName = name.replace('.min', '')
standardDictNames.push(standardName)
fs.writeFileSync(save + standardName, JSON.stringify(list, null, 2));
fs.writeFileSync(save + standardName.replace('.json', '.min.json'), JSON.stringify(list));
} catch (e) {
console.log('err', name, e)
}
}

View File

@@ -2,6 +2,40 @@ let path = require("path");
let fs = require("fs");
const axios = require('axios')
let str = fs.readFileSync('./save/allNew.min.json', "utf8");
let str = fs.readFileSync('./failWord.txt', "utf8");
let failList = []
let list = JSON.parse(str)
console.log('str',JSON.parse(str)[0])
async function sleep(val) {
return new Promise(resolve => {
setTimeout(resolve, val)
})
}
function fail(word) {
failList.push(word)
console.log('失败:', failList.length)
fs.writeFileSync('./fail.txt', JSON.stringify(failList, null, 2));
}
async function test() {
for (let i = 0; i < list.length; i++) {
let v = list[i]
console.log('进度', i)
await sleep(100)
axios({
url: 'http://localhost/index.php/v1/support/test',
method: 'post',
data: v
}).then(r => {
if (!r.data.success) {
fail(v.word)
}
}).catch(r => {
fail(v.word)
})
}
}
test()

38
js_node/test2.cjs Normal file
View File

@@ -0,0 +1,38 @@
let path = require("path");
let fs = require("fs");
const axios = require('axios')
let str = fs.readFileSync('./save/allNew.min.json', "utf8");
let failStr = fs.readFileSync('./fail.txt', "utf8");
let failList = JSON.parse(failStr)
let list = JSON.parse(str)
async function sleep(val) {
return new Promise(resolve => {
setTimeout(resolve, val)
})
}
let s = []
function fail(word) {
s.push(word)
console.log('失败:', s.length)
fs.writeFileSync('./failWord.txt', JSON.stringify(s, null, 2));
}
// console.log('failList',failList)
async function test() {
for (let i = 0; i < list.length; i++) {
// for (let i = 0; i < 3000; i++) {
let v = list[i]
if (failList.includes(v.word)) {
console.log('进度', v.word)
fail(v)
}
}
}
test()

View File

@@ -0,0 +1,35 @@
let path = require("path");
let fs = require("fs");
let read = './dict/'
let save = "./res/";
//判断是不是目录
const dirs = fs.readdirSync(read)
dirs.forEach(dictName => {
formatDict(read, dictName)
})
// formatDict(read, 'BEC_2.json')
function formatDict(path, name) {
try {
let newObj = []
let str = fs.readFileSync(path + name, "utf8");
let list = str.split('\n')
list.map(v => {
if (!v) return
let item = JSON.parse(v)
// console.log('v', item.headWord)
newObj.push(item.headWord)
})
// fs.writeFileSync(save + name, JSON.stringify(newObj, null, 2));
fs.writeFileSync(save + name.replace('.json', '_word.json'), JSON.stringify(newObj));
console.log(name, newObj.length)
} catch (e) {
console.log('err', name, e)
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.