This commit is contained in:
zyronon
2023-12-03 23:40:24 +08:00
parent 20165db2f3
commit f839e54617
10 changed files with 100 additions and 71 deletions

View File

@@ -46,7 +46,7 @@ export function splitEnArticle(text: string): { sections: Sentence[][], newText:
let doc = nlp.tokenize(rowSection)
let sentences = doc.json()
// console.log('--')
console.log('ss', sentences)
// console.log('ss', sentences)
sentences.map(sentenceRow => {
let sentence: Sentence = {
//他没有空格,导致修改一行一行的数据时,汇总时全没有空格了,库无法正常断句
@@ -138,15 +138,20 @@ export function splitEnArticle(text: string): { sections: Sentence[][], newText:
break
//类似于这种的“' -- ”的。需要保留空格用了一个占位符才处理因为每个符号都会把前面的那个字符的nextSpace改为false
case ' ':
console.log('sentence', sentence)
sentence.words[sentence.words.length - 1].nextSpace = true
let word3 = cloneDeep({
...DefaultArticleWord,
name: 'placeholder',
isSymbol: true,
nextSpace: false,
});
sentence.words.push(word3)
// console.log('sentence', sentence)
//遇到“The clock has stopped!' I looked at my watch.”
//检测到stopped!' 的'时如果前引号不在当前句会把当前句的word合并到前一句。那么当前句的word就为空了会报错
//所以需要检测一下
if (sentence.words.length) {
sentence.words[sentence.words.length - 1].nextSpace = true
let word3 = cloneDeep({
...DefaultArticleWord,
name: 'placeholder',
isSymbol: true,
nextSpace: false,
});
sentence.words.push(word3)
}
break
default:
// console.log('post', post)
@@ -183,11 +188,7 @@ export function splitEnArticle(text: string): { sections: Sentence[][], newText:
let post: string = v.post
//判断是不是等于空,因为正常的词后面都会有个空格。这种不需要处理。
if (post && post !== ' ') {
try {
checkSymbol(post)
} catch (e) {
console.log('err', v)
}
checkSymbol(post)
}
}
})
@@ -197,19 +198,19 @@ export function splitEnArticle(text: string): { sections: Sentence[][], newText:
})
})
// sections = sections.filter(sectionItem => sectionItem.length)
// sections.map((sectionItem, a) => {
// sectionItem.map((sentenceItem, b) => {
// sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => {
// previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '')
// return previousValue
// }, '')
// })
// })
sections = sections.filter(sectionItem => sectionItem.length)
sections.map((sectionItem, a) => {
sectionItem.map((sentenceItem, b) => {
sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => {
previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '')
return previousValue
}, '')
})
})
// console.log(sections)
// console.timeEnd()
console.log('sections', sections)
// console.log('sections', sections)
return {
newText: text,
sections
@@ -278,11 +279,12 @@ export function isArticle(type: DictType): boolean {
export function getTranslateText(article: Article) {
if (article.useTranslateType === TranslateType.custom) {
return article.textCustomTranslate
.split('\r\n\r\n').filter(v => v)
.split('\n\n').filter(v => v)
} else if (article.useTranslateType === TranslateType.network) {
return article.textNetworkTranslate
.split('\r\n\r\n').filter(v => v)
.split('\n\n').filter(v => v)
} else {
return []
}

View File

@@ -6,29 +6,29 @@ import {Translator} from "@opentranslate/translator/src/translator.ts";
export function renewSectionTranslates(article: Article, translate: string) {
let failCount = 0
let articleTranslate = translate.split('\n')
// console.log('articleTranslate', articleTranslate)
// console.log('articleTranslate', articleTranslate)
let count = 0
for (let i = 0; i < article.sections.length; i++) {
let v = article.sections[i]
for (let j = 0; j < v.length; j++) {
let sentence = v[j]
try {
let trans = articleTranslate[count]
if (trans) {
sentence.translate = trans
} else {
failCount++
}
} catch (e) {
failCount++
// console.log('没有对应的翻译', sentence.text)
}
count++
let articleTranslate = translate.split('\n')
// console.log('articleTranslate', articleTranslate)
// console.log('articleTranslate', articleTranslate)
let count = 0
for (let i = 0; i < article.sections.length; i++) {
let v = article.sections[i]
for (let j = 0; j < v.length; j++) {
let sentence = v[j]
try {
let trans = articleTranslate[count]
if (trans.trim()) {
sentence.translate = trans
} else {
failCount++
}
count++
} catch (e) {
failCount++
// console.log('没有对应的翻译', sentence.text)
}
count++
}
count++
}
return failCount
}