优化词典

This commit is contained in:
zyronon
2023-10-17 18:58:44 +08:00
parent c3960735ff
commit 8e332e1463
205 changed files with 442 additions and 2041707 deletions

43
node/checkRepeat.cjs Normal file
View File

@@ -0,0 +1,43 @@
let path = require("path");
let fs = require("fs");
const str = fs.readFileSync("../public/dicts/en/cn/translate.json", "utf8");
let translateDict = JSON.parse(str);
let pathName = "../public/dicts/en/cn";
let newDicts = []
//判断是不是目录
const dirs = fs.readdirSync(pathName)
dirs.forEach(dictName => {
if (!dictName.includes('translate.json')) {
let dictPath = path.join(pathName, dictName)
console.log('d', dictPath)
formatDict(dictPath)
}
})
function formatDict(path) {
try {
const str = fs.readFileSync(path, "utf8");
let dicts = JSON.parse(str);
newDicts = newDicts.concat(dicts)
console.log(dicts.length);
console.log(newDicts.length);
newDicts = Array.from(new Set(newDicts))
console.log(newDicts.length);
// fs.writeFileSync(
// path,
// JSON.stringify(dicts, null, 2)
// );
} catch (e) {
console.log('err', e)
}
}

122
node/formatBigDict.cjs Normal file
View File

@@ -0,0 +1,122 @@
const { log } = require("console");
const fs = require("fs");
try {
const str = fs.readFileSync("./public/dicts/coca20000.json", "utf8");
let dicts = JSON.parse(str);
console.log(dicts.length);
// let a = [
// "vt.",
// "vi.",
// "pron.",
// "adj.",
// "adv.",
// "num.",
// "interj.",
// "art.",
// "aux.",
// "conj.",
// "prep.",
// // "n.",
// // "v.",
// ];
// dicts.map((v) => {
// v.trans = v.trans.map((s) => {
// s = s.replaceAll(",", "");
// a.map((b) => {
// s = s.replaceAll(b, `^${b}`);
// });
// if (s[0] === "^") {
// s = s.substr(1);
// }
// s = s.split("^").filter((v) => v);
// let last = null;
// s = s.reduce((p, c, i, a) => {
// if (last) {
// c = last + c;
// p.push(c);
// last = null;
// } else {
// if (c.includes("& ")) {
// last = c;
// } else {
// p.push(c);
// last = null;
// }
// }
// return p;
// }, []);
// s = s.map((d) => {
// let r1 = d.indexOf("adv.");
// if (r1 > -1) {
// let t2 = d.substr(r1 + 4).replaceAll("v.", `^v.`);
// t2 = t2.split("^").filter((v) => v);
// t2[0] = d.substr(0, r1 + 4) + t2[0];
// d = t2;
// } else {
// let t2 = d.replaceAll("v.", `^v.`);
// t2 = t2.split("^").filter((v) => v);
// d = t2;
// }
// return d;
// });
// s = s.flat();
// s = s.map((d) => {
// let r1 = d.indexOf("pron.");
// if (r1 > -1) {
// let t2 = d.substr(r1 + 5).replaceAll("n.", `^n.`);
// t2 = t2.split("^").filter((v) => v);
// t2[0] = d.substr(0, r1 + 5) + t2[0];
// d = t2;
// } else {
// let t2 = d.replaceAll("n.", `^n.`);
// t2 = t2.split("^").filter((v) => v);
// d = t2;
// }
// return d;
// });
// s = s.flat();
// return s;
// });
// v.trans = v.trans.flat();
// return v;
// });
// console.log(dicts);
// let newDict = [];
// dicts.map((v) => {
// if (!newDict.find((s) => s.name === v.name)) {
// newDict.push(v);
// }
// });
// console.log(newDict.length);
dicts.map((v) => {
v.trans = v.trans.map((d) => {
let t2 = d.replaceAll("int.", `^int.`);
t2 = t2.split("^").filter((v) => v);
d = t2;
return d;
});
v.trans = v.trans.flat();
return v;
});
fs.writeFileSync(
"./public/dicts/coca20000.json",
JSON.stringify(dicts, null, 2)
);
} catch (err) {
console.error(err);
}

56
node/formatDictSimple.cjs Normal file
View File

@@ -0,0 +1,56 @@
let path = require("path");
let fs = require("fs");
const str = fs.readFileSync("../public/dicts/en/cn/translate.json", "utf8");
let translateDict = JSON.parse(str);
let ts = []
console.log(translateDict.length)
let pathName = "../public/dicts/en/cn";
// let pathName = "./d";
//判断是不是目录
const dirs = fs.readdirSync(pathName)
dirs.forEach(dictName => {
if (!dictName.includes('translate.json')) {
let dictPath = path.join(pathName, dictName)
// console.log('d', dictPath)
formatDict(dictPath)
}
})
fs.writeFileSync(
"../public/dicts/en/cn/ts.json",
JSON.stringify(ts, null, 2)
);
console.log(ts.length)
function formatDict(path) {
try {
const str = fs.readFileSync(path, "utf8");
let dicts = JSON.parse(str);
dicts = dicts.map(v => {
if (!translateDict.find(w=>w.name === v.name)){
translateDict.push(v)
ts.push(v)
}
// delete v.trans
// delete v.usphone
// delete v.ukphone
return v.name
})
// let newDicts = Array.from(new Set(dicts))
// console.log(dicts.length);
// console.log(newDicts.length);
// fs.writeFileSync(
// path,
// JSON.stringify(newDicts, null, 2)
// );
} catch (e) {
// console.log('err', e)
}
}