From 0bf4cfe61b8204c3f215cd52023edf0572225da9 Mon Sep 17 00:00:00 2001 From: zyronon Date: Wed, 27 Sep 2023 01:41:22 +0800 Subject: [PATCH] save --- Note.md | 8 + components.d.ts | 4 + package.json | 2 + pnpm-lock.yaml | 104 ++++++ src/hooks/article.ts | 830 ++++++++++++++++++++++++----------------- test/index.js | 54 +++ test/package-lock.json | 199 ++++++++++ test/package.json | 18 + 8 files changed, 883 insertions(+), 336 deletions(-) create mode 100644 Note.md create mode 100644 test/index.js create mode 100644 test/package-lock.json create mode 100644 test/package.json diff --git a/Note.md b/Note.md new file mode 100644 index 00000000..e0358578 --- /dev/null +++ b/Note.md @@ -0,0 +1,8 @@ +例句:On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration. +sentence-tokenizer + 可以正常断句,但无法识别例句 +sentence-splitter + 无法正常断句(在引号里面的问号和感叹号会被断句),但可以识别例句 + +https://github.com/Tessmore/sbd + 一切正常 \ No newline at end of file diff --git a/components.d.ts b/components.d.ts index ffb49ddf..9631e2f5 100644 --- a/components.d.ts +++ b/components.d.ts @@ -21,11 +21,15 @@ declare module 'vue' { DictModal: typeof import('./src/components/Toolbar/DictModal.vue')['default'] EditAbleText: typeof import('./src/components/EditAbleText.vue')['default'] ElInput: typeof import('element-plus/es')['ElInput'] + ElInputNumber: typeof import('element-plus/es')['ElInputNumber'] ElOption: typeof import('element-plus/es')['ElOption'] ElProgress: typeof import('element-plus/es')['ElProgress'] + ElRadio: typeof import('element-plus/es')['ElRadio'] ElRadioButton: typeof import('element-plus/es')['ElRadioButton'] ElRadioGroup: typeof import('element-plus/es')['ElRadioGroup'] ElSelect: typeof import('element-plus/es')['ElSelect'] + ElSlider: typeof import('element-plus/es')['ElSlider'] + ElSwitch: typeof import('element-plus/es')['ElSwitch'] FeedbackModal: typeof import('./src/components/Toolbar/FeedbackModal.vue')['default'] Fireworks: typeof import('./src/components/Fireworks.vue')['default'] Footer: typeof import('./src/components/Practice/Footer.vue')['default'] diff --git a/package.json b/package.json index 47f511bf..85b958cb 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,8 @@ "lodash-es": "^4.17.21", "mitt": "^3.0.1", "pinia": "^2.1.6", + "sbd": "^1.0.19", + "sentence-splitter": "^4.2.1", "swiper": "^10.1.0", "tesseract.js": "^4.1.1", "uuid": "^9.0.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 5e2090ee..261430b8 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -38,6 +38,12 @@ dependencies: pinia: specifier: ^2.1.6 version: 2.1.6(typescript@5.2.2)(vue@3.3.4) + sbd: + specifier: ^1.0.19 + version: 1.0.19 + sentence-splitter: + specifier: ^4.2.1 + version: 4.2.1 swiper: specifier: ^10.1.0 version: 10.1.0 @@ -832,6 +838,10 @@ packages: resolution: {integrity: sha512-Ccy0NlLkzr0Ex2FKvh2X+OyERHXJ88XJ1MXtsI9y9fGexlaXaVTPzBCRBwIxFkORuOb+uBqeu+RqnpgYTEZRUQ==} dev: false + /@textlint/ast-node-types@13.3.3: + resolution: {integrity: sha512-KCpJppfX3Km69twa6SmVEJ8mkyAZSrxw3XaaLQSlpc7PWnLUJSCHGPVECI1nSUDhiTd1r6zlRvWuyIAZJiov+A==} + dev: false + /@tsconfig/node10@1.0.9: resolution: {integrity: sha512-jNsYVVxU8v5g43Erja32laIDHXeoNvFEpX33OK4d6hljo3jDhCBDhx5dhCCTMWUojscpAagGiRkBKxpdl9fxqA==} requiresBuild: true @@ -1214,6 +1224,10 @@ packages: resolution: {integrity: sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==} dev: false + /boundary@2.0.0: + resolution: {integrity: sha512-rJKn5ooC9u8q13IMCrW0RSp31pxBCHE3y9V/tp3TdWSLf8Em3p6Di4NBpfzbJge9YjjFEsD0RtFEjtvHL5VyEA==} + dev: false + /brace-expansion@1.1.11: resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} dependencies: @@ -1525,6 +1539,11 @@ packages: resolution: {integrity: sha512-Q6fKUPqnAHAyhiUgFU7BUzLiv0kd8saH9al7tnu5Q/okj6dnupxyTgFIBjVzJATdfIAm9NAsvXNzjaKa+bxVyA==} dev: true + /deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + dev: false + /defaults@1.0.4: resolution: {integrity: sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A==} dependencies: @@ -1558,6 +1577,33 @@ packages: dev: true optional: true + /dom-serializer@2.0.0: + resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + entities: 4.5.0 + dev: false + + /domelementtype@2.3.0: + resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==} + dev: false + + /domhandler@5.0.3: + resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} + engines: {node: '>= 4'} + dependencies: + domelementtype: 2.3.0 + dev: false + + /domutils@3.1.0: + resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==} + dependencies: + dom-serializer: 2.0.0 + domelementtype: 2.3.0 + domhandler: 5.0.3 + dev: false + /electron-to-chromium@1.4.488: resolution: {integrity: sha512-Dv4sTjiW7t/UWGL+H8ZkgIjtUAVZDgb/PwGWvMsCT7jipzUV/u5skbLXPFKb6iV0tiddVi/bcS2/kUrczeWgIQ==} dev: true @@ -1591,6 +1637,11 @@ packages: resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} dev: true + /entities@4.5.0: + resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} + engines: {node: '>=0.12'} + dev: false + /error-ex@1.3.2: resolution: {integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==} requiresBuild: true @@ -1643,6 +1694,11 @@ packages: engines: {node: '>=0.8.0'} dev: true + /escape-string-regexp@4.0.0: + resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==} + engines: {node: '>=10'} + dev: false + /escape-string-regexp@5.0.0: resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==} engines: {node: '>=12'} @@ -1907,6 +1963,15 @@ packages: engines: {node: '>=8'} dev: true + /htmlparser2@8.0.2: + resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} + dependencies: + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.1.0 + entities: 4.5.0 + dev: false + /husky@8.0.3: resolution: {integrity: sha512-+dQSyqPh4x1hlO1swXBiNb2HzTDN1I2IGLQx1GrBuiqFJfoMrnZWwVmatvSiO+Iz8fBUnf+lekwNo4c2LlXItg==} engines: {node: '>=14'} @@ -2042,6 +2107,11 @@ packages: engines: {node: '>=0.12.0'} dev: true + /is-plain-object@5.0.0: + resolution: {integrity: sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==} + engines: {node: '>=0.10.0'} + dev: false + /is-unicode-supported@0.1.0: resolution: {integrity: sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw==} engines: {node: '>=10'} @@ -2438,6 +2508,10 @@ packages: engines: {node: '>=0.10.0'} dev: true + /parse-srcset@1.0.2: + resolution: {integrity: sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==} + dev: false + /path-is-absolute@1.0.1: resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} engines: {node: '>=0.10.0'} @@ -2656,6 +2730,17 @@ packages: resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} dev: true + /sanitize-html@2.11.0: + resolution: {integrity: sha512-BG68EDHRaGKqlsNjJ2xUB7gpInPA8gVx/mvjO743hZaeMCZ2DwzW7xvsqZ+KNU4QKwj86HJ3uu2liISf2qBBUA==} + dependencies: + deepmerge: 4.3.1 + escape-string-regexp: 4.0.0 + htmlparser2: 8.0.2 + is-plain-object: 5.0.0 + parse-srcset: 1.0.2 + postcss: 8.4.27 + dev: false + /sass@1.64.2: resolution: {integrity: sha512-TnDlfc+CRnUAgLO9D8cQLFu/GIjJIzJCGkE7o4ekIGQOH7T3GetiRR/PsTWJUHhkzcSPrARkPI+gNWn5alCzDg==} engines: {node: '>=14.0.0'} @@ -2666,6 +2751,12 @@ packages: source-map-js: 1.0.2 dev: true + /sbd@1.0.19: + resolution: {integrity: sha512-b5RyZMGSrFuIB4AHdbv12uYHS8YGEJ36gtuvG3RflbJGY+T0dXmAL0E4vZjQqT2RsX0v+ZwVqhV2zsGr5aFK9w==} + dependencies: + sanitize-html: 2.11.0 + dev: false + /scule@1.0.0: resolution: {integrity: sha512-4AsO/FrViE/iDNEPaAQlb77tf0csuq27EsVpy6ett584EcRTp6pTDLoGWVxCD77y5iU5FauOvhsI4o1APwPoSQ==} dev: true @@ -2683,6 +2774,13 @@ packages: lru-cache: 6.0.0 dev: true + /sentence-splitter@4.2.1: + resolution: {integrity: sha512-zn7awgCg40lyb+fe6N/fRJS3r+Ag3SmrmiYHZZSM9oQ2HTnwSMooUgQXSMLeQdi5HWMYOnhrovE2JZ3pyGU0dg==} + dependencies: + '@textlint/ast-node-types': 13.3.3 + structured-source: 4.0.0 + dev: false + /side-channel@1.0.4: resolution: {integrity: sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==} dependencies: @@ -2742,6 +2840,12 @@ packages: acorn: 8.10.0 dev: true + /structured-source@4.0.0: + resolution: {integrity: sha512-qGzRFNJDjFieQkl/sVOI2dUjHKRyL9dAJi2gCPGJLbJHBIkyOHxjuocpIEfbLioX+qSJpvbYdT49/YCdMznKxA==} + dependencies: + boundary: 2.0.0 + dev: false + /supports-color@5.5.0: resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==} engines: {node: '>=4'} diff --git a/src/hooks/article.ts b/src/hooks/article.ts index 5344a13d..25f0f5d7 100644 --- a/src/hooks/article.ts +++ b/src/hooks/article.ts @@ -1,367 +1,525 @@ import {DefaultArticleWord, Sentence, Word} from "@/types.ts"; -import {cloneDeep} from "lodash-es"; +import {cloneDeep, indexOf} from "lodash-es"; +import {split} from 'sentence-splitter' +import tokenizer from 'sbd' interface KeyboardMap { - Period: string, - Comma: string, - Slash: string, - Exclamation: string, - QuoteLeft: string, - QuoteRight: string, + Period: string, + Comma: string, + Slash: string, + Exclamation: string, + QuoteLeft: string, + QuoteRight: string, } export const CnKeyboardMap: KeyboardMap = { - Period: '。', - Comma: ',', - Slash: '?', - Exclamation: '!', - QuoteLeft: '“', - QuoteRight: '”', + Period: '。', + Comma: ',', + Slash: '?', + Exclamation: '!', + QuoteLeft: '“', + QuoteRight: '”', } export const EnKeyboardMap: KeyboardMap = { - Period: '.', - Comma: ',', - Slash: '?', - Exclamation: '!', - QuoteLeft: '"', - QuoteRight: '"', + Period: '.', + Comma: ',', + Slash: '?', + Exclamation: '!', + QuoteLeft: '"', + QuoteRight: '"', } export function splitEnArticle(text: string, lang: string = 'en', keyboardMap: KeyboardMap = EnKeyboardMap): { - sections: Sentence[][], - newText: string + sections: Sentence[][], + newText: string } { - let sections: Sentence[][] = [] - let section: Sentence[] = [] - let sentence: Sentence = { - text: '', - translate: '', - words: [] - } - section.push(sentence) - sections.push(section) - let word = cloneDeep({...DefaultArticleWord, name: '', nextSpace: true}); + let sections: Sentence[][] = [] + let section: Sentence[] = [] + let sentence: Sentence = { + text: '', + translate: '', + words: [] + } + section.push(sentence) + sections.push(section) + let word = cloneDeep({...DefaultArticleWord, name: '', nextSpace: true}); - //去除头和尾部的空格 - text = text.trim() - //加\n用于添加最后一段 - text += '\n' - text = text.replaceAll(`‘`, '"') - text = text.replaceAll(`’`, '"') - text = text.replaceAll(`“`, '"') - text = text.replaceAll(`”`, '"') - //替换所有单引号为双引号 - text = text.replaceAll(`'`, '"') - //将缩写词的双引号替换回单引号 - text = text.replaceAll(`"t`, `'t`) - text = text.replaceAll(`"s`, `'s`) - text = text.replaceAll(`"S`, `'S`) - text = text.replaceAll(`"m`, `'m`) - text = text.replaceAll(`"d`, `'d`) - text = text.replaceAll(`"ve`, `'ve`) - text = text.replaceAll(`"clock`, `'clock`) - console.log('splitEnArticle', text) - // console.log('splitEnArticle length', text.length) + //去除头和尾部的空格 + text = text.trim() + //加\n用于添加最后一段 + text += '\n' + text = text.replaceAll(`‘`, '"') + text = text.replaceAll(`’`, '"') + text = text.replaceAll(`“`, '"') + text = text.replaceAll(`”`, '"') + //替换所有单引号为双引号 + text = text.replaceAll(`'`, '"') + //将缩写词的双引号替换回单引号 + text = text.replaceAll(`"t`, `'t`) + text = text.replaceAll(`"s`, `'s`) + text = text.replaceAll(`"S`, `'S`) + text = text.replaceAll(`"m`, `'m`) + text = text.replaceAll(`"d`, `'d`) + text = text.replaceAll(`"ve`, `'ve`) + text = text.replaceAll(`"clock`, `'clock`) + console.log('splitEnArticle', text) + // console.log('splitEnArticle length', text.length) - text.split('').map((v, i, arr) => { - // if (i > 2306) debugger - switch (v) { - case ' ': - if (word.name) { - sentence.words.push(word) - word = cloneDeep(DefaultArticleWord) - } - break - case keyboardMap.Period: - case keyboardMap.Comma: - case keyboardMap.Slash: - case keyboardMap.Exclamation: - word.nextSpace = false - sentence.words.push(word) - sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true, isSymbol: true})) - section.push({ - text: '', - translate: '', - words: [] - }) - sentence = section[section.length - 1] - word = cloneDeep(DefaultArticleWord) - break - case keyboardMap.QuoteLeft: - let nearSymbolPosition = null - let indexs = { - a: -1, - b: -1, - c: -1 - } - //TODO 可以优化成for+break - sections.toReversed().map((sectionItem, a) => { - sectionItem.toReversed().map((sentenceItem, b) => { - sentenceItem.words.toReversed().map((wordItem, c) => { - if (wordItem.symbolPosition !== '' && nearSymbolPosition === null) { - nearSymbolPosition = wordItem.symbolPosition - indexs = {a, b, c} - } - }) - }) - }) - - if (nearSymbolPosition === 'end' || nearSymbolPosition === null) { - sentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: false, - isSymbol: true, - symbolPosition: 'start' - })) - word = cloneDeep(DefaultArticleWord) - } else { - let addCurrent = false - sentence.words.toReversed().map((wordItem, c) => { - if (wordItem.symbolPosition === 'start' && !addCurrent) { - addCurrent = true - } - }) - if (addCurrent) { - //`“这是私人谈话”`这种没有结束符号的情况,swtich走不到结束符号,也就不会起新的一行 - if (word.name.length) { - sentence.words.push(word) - } - sentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: true, - isSymbol: true, - symbolPosition: 'end' - })) - word = cloneDeep(DefaultArticleWord) - } else { - let lastSentence = section[section.length - 2] - lastSentence.words[lastSentence.words.length - 1].nextSpace = false - lastSentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: true, - isSymbol: true, - symbolPosition: 'end' - })) - } - } - - break - case '\n': - //如果是空行,就删除 - if (!sentence.words.length) { - section.pop() - sentence = section[section.length - 1] - } - //判断name有没有值,有值说明最后一句没有结束符,正常来说一句话以句号或逗号结尾 - if (word.name.length) { - sentence.words.push(word) - } - if (i !== arr.length - 1) { - sections.push([]) - section = sections[sections.length - 1] - section.push({ - text: '', - translate: '', - words: [] - }) - sentence = section[section.length - 1] - word = cloneDeep(DefaultArticleWord) - } - break - default: - // if (v === '2')debugger - word.name += v - break + text.split('').map((v, i, arr) => { + // if (i > 2306) debugger + switch (v) { + case ' ': + if (word.name) { + sentence.words.push(word) + word = cloneDeep(DefaultArticleWord) } - }) - sections = sections.filter(sectionItem => sectionItem.length) - sections.map((sectionItem, a) => { - sectionItem.map((sentenceItem, b) => { - sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => { - previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '') - return previousValue - }, '') + break + case keyboardMap.Period: + case keyboardMap.Comma: + case keyboardMap.Slash: + case keyboardMap.Exclamation: + word.nextSpace = false + sentence.words.push(word) + sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true, isSymbol: true})) + section.push({ + text: '', + translate: '', + words: [] }) - }) - return { - newText: text, - sections + sentence = section[section.length - 1] + word = cloneDeep(DefaultArticleWord) + break + case keyboardMap.QuoteLeft: + let nearSymbolPosition = null + let indexs = { + a: -1, + b: -1, + c: -1 + } + //TODO 可以优化成for+break + sections.toReversed().map((sectionItem, a) => { + sectionItem.toReversed().map((sentenceItem, b) => { + sentenceItem.words.toReversed().map((wordItem, c) => { + if (wordItem.symbolPosition !== '' && nearSymbolPosition === null) { + nearSymbolPosition = wordItem.symbolPosition + indexs = {a, b, c} + } + }) + }) + }) + + if (nearSymbolPosition === 'end' || nearSymbolPosition === null) { + sentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: false, + isSymbol: true, + symbolPosition: 'start' + })) + word = cloneDeep(DefaultArticleWord) + } else { + let addCurrent = false + sentence.words.toReversed().map((wordItem, c) => { + if (wordItem.symbolPosition === 'start' && !addCurrent) { + addCurrent = true + } + }) + if (addCurrent) { + //`“这是私人谈话”`这种没有结束符号的情况,swtich走不到结束符号,也就不会起新的一行 + if (word.name.length) { + sentence.words.push(word) + } + sentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: true, + isSymbol: true, + symbolPosition: 'end' + })) + word = cloneDeep(DefaultArticleWord) + } else { + let lastSentence = section[section.length - 2] + lastSentence.words[lastSentence.words.length - 1].nextSpace = false + lastSentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: true, + isSymbol: true, + symbolPosition: 'end' + })) + } + } + + break + case '\n': + //如果是空行,就删除 + if (!sentence.words.length) { + section.pop() + sentence = section[section.length - 1] + } + //判断name有没有值,有值说明最后一句没有结束符,正常来说一句话以句号或逗号结尾 + if (word.name.length) { + sentence.words.push(word) + } + if (i !== arr.length - 1) { + sections.push([]) + section = sections[sections.length - 1] + section.push({ + text: '', + translate: '', + words: [] + }) + sentence = section[section.length - 1] + word = cloneDeep(DefaultArticleWord) + } + break + default: + // if (v === '2')debugger + word.name += v + break } + }) + sections = sections.filter(sectionItem => sectionItem.length) + sections.map((sectionItem, a) => { + sectionItem.map((sentenceItem, b) => { + sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => { + previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '') + return previousValue + }, '') + }) + }) + return { + newText: text, + sections + } } export function splitCNArticle(article: string, lang: string = 'cn', keyboardMap: KeyboardMap = CnKeyboardMap): Sentence[][] { - let sections: Sentence[][] = [] - let section: Sentence[] = [] - let sentence: Sentence = { - text: '', - translate: '', - words: [] - } - section.push(sentence) - sections.push(section) - let word = cloneDeep({...DefaultArticleWord, name: '', nextSpace: true}); - //去除头和尾部的空格 - article = article.trim() - //加\n用于添加最后一段 - article += '\n' - // console.log('articles', articles) + let sections: Sentence[][] = [] + let section: Sentence[] = [] + let sentence: Sentence = { + text: '', + translate: '', + words: [] + } + section.push(sentence) + sections.push(section) + let word = cloneDeep({...DefaultArticleWord, name: '', nextSpace: true}); + //去除头和尾部的空格 + article = article.trim() + //加\n用于添加最后一段 + article += '\n' + // console.log('articles', articles) - article.split('').map((v, i, arr) => { - switch (v) { - case ' ': - if (word.name) { - sentence.words.push(word) - word = cloneDeep(DefaultArticleWord) - } - break - case keyboardMap.Period: - case keyboardMap.Comma: - case keyboardMap.Slash: - case keyboardMap.Exclamation: - word.nextSpace = false - sentence.words.push(word) - sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true})) - section.push({ - text: '', - translate: '', - words: [] - }) - sentence = section[section.length - 1] - word = cloneDeep(DefaultArticleWord) - break - case keyboardMap.QuoteLeft: - sentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: false, - isSymbol: true, - symbolPosition: 'start' - })) - word = cloneDeep(DefaultArticleWord) - break - case keyboardMap.QuoteRight: - let nearSymbolPosition = null - //TODO 可以优化成for+break - sections.toReversed().map((sectionItem, a) => { - sectionItem.toReversed().map((sentenceItem, b) => { - sentenceItem.words.toReversed().map((wordItem, c) => { - if (wordItem.symbolPosition !== '' && nearSymbolPosition === null) { - nearSymbolPosition = wordItem.symbolPosition - } - }) - }) - }) - - if (nearSymbolPosition === 'start' || nearSymbolPosition === null) { - let addCurrent = false - sentence.words.toReversed().map((wordItem, c) => { - if (wordItem.symbolPosition === 'start' && !addCurrent) { - addCurrent = true - } - }) - if (addCurrent) { - //`“这是私人谈话”`这种没有结束符号的情况,swtich走不到结束符号,也就不会起新的一行 - if (word.name.length) { - sentence.words.push(word) - } - sentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: true, - isSymbol: true, - symbolPosition: 'end' - })) - word = cloneDeep(DefaultArticleWord) - } else { - let lastSentence = section[section.length - 2] - lastSentence.words[lastSentence.words.length - 1].nextSpace = false - lastSentence.words.push(cloneDeep({ - ...DefaultArticleWord, - name: v, - nextSpace: true, - isSymbol: true, - symbolPosition: 'end' - })) - } - } - break - case '\n': - //如果是空行,就删除 - if (!sentence.words.length) { - section.pop() - sentence = section[section.length - 1] - } - //判断name有没有值,有值说明最后一句没有结束符,正常来说一句话以句号或逗号结尾 - if (word.name.length) { - sentence.words.push(word) - } - if (i !== arr.length - 1) { - sections.push([]) - section = sections[sections.length - 1] - section.push({ - text: '', - translate: '', - words: [] - }) - sentence = section[section.length - 1] - word = cloneDeep(DefaultArticleWord) - } - break - default: - word.name += v - break + article.split('').map((v, i, arr) => { + switch (v) { + case ' ': + if (word.name) { + sentence.words.push(word) + word = cloneDeep(DefaultArticleWord) } - }) - // console.log(cloneDeep(sections)) - sections = sections.filter(sectionItem => sectionItem.length) - sections.map((sectionItem, a) => { - sectionItem.map((sentenceItem, b) => { - sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => { - previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '') - return previousValue - }, '') + break + case keyboardMap.Period: + case keyboardMap.Comma: + case keyboardMap.Slash: + case keyboardMap.Exclamation: + word.nextSpace = false + sentence.words.push(word) + sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true})) + section.push({ + text: '', + translate: '', + words: [] + }) + sentence = section[section.length - 1] + word = cloneDeep(DefaultArticleWord) + break + case keyboardMap.QuoteLeft: + sentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: false, + isSymbol: true, + symbolPosition: 'start' + })) + word = cloneDeep(DefaultArticleWord) + break + case keyboardMap.QuoteRight: + let nearSymbolPosition = null + //TODO 可以优化成for+break + sections.toReversed().map((sectionItem, a) => { + sectionItem.toReversed().map((sentenceItem, b) => { + sentenceItem.words.toReversed().map((wordItem, c) => { + if (wordItem.symbolPosition !== '' && nearSymbolPosition === null) { + nearSymbolPosition = wordItem.symbolPosition + } + }) + }) }) - }) - return sections + if (nearSymbolPosition === 'start' || nearSymbolPosition === null) { + let addCurrent = false + sentence.words.toReversed().map((wordItem, c) => { + if (wordItem.symbolPosition === 'start' && !addCurrent) { + addCurrent = true + } + }) + if (addCurrent) { + //`“这是私人谈话”`这种没有结束符号的情况,swtich走不到结束符号,也就不会起新的一行 + if (word.name.length) { + sentence.words.push(word) + } + sentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: true, + isSymbol: true, + symbolPosition: 'end' + })) + word = cloneDeep(DefaultArticleWord) + } else { + let lastSentence = section[section.length - 2] + lastSentence.words[lastSentence.words.length - 1].nextSpace = false + lastSentence.words.push(cloneDeep({ + ...DefaultArticleWord, + name: v, + nextSpace: true, + isSymbol: true, + symbolPosition: 'end' + })) + } + } + break + case '\n': + //如果是空行,就删除 + if (!sentence.words.length) { + section.pop() + sentence = section[section.length - 1] + } + //判断name有没有值,有值说明最后一句没有结束符,正常来说一句话以句号或逗号结尾 + if (word.name.length) { + sentence.words.push(word) + } + if (i !== arr.length - 1) { + sections.push([]) + section = sections[sections.length - 1] + section.push({ + text: '', + translate: '', + words: [] + }) + sentence = section[section.length - 1] + word = cloneDeep(DefaultArticleWord) + } + break + default: + word.name += v + break + } + }) + // console.log(cloneDeep(sections)) + sections = sections.filter(sectionItem => sectionItem.length) + sections.map((sectionItem, a) => { + sectionItem.map((sentenceItem, b) => { + sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => { + previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '') + return previousValue + }, '') + }) + }) + + return sections } export function getSplitTranslateText(article: string) { - let sections = splitCNArticle(article) - let str = '' - if (sections.length) { - sections.map((sectionItem) => { - sectionItem.map((sentenceItem) => { - str += sentenceItem.text + '\n' - }) - str += '\n' - }) - } - return str + let sections = splitCNArticle(article) + let str = '' + if (sections.length) { + sections.map((sectionItem) => { + sectionItem.map((sentenceItem) => { + str += sentenceItem.text + '\n' + }) + str += '\n' + }) + } + return str } export function splitEnArticle2(text: string) { - //去除头和尾部的空格 - text = text.trim() - // text = text.replaceAll(`‘`, '"') - // text = text.replaceAll(`’`, '"') - // text = text.replaceAll(`“`, '"') - // text = text.replaceAll(`”`, '"') - // 替换所有单引号为双引号 - // text = text.replaceAll(`'`, '"') - // 将缩写词的双引号替换回单引号 - // text = text.replaceAll(`"t`, `'t`) - // text = text.replaceAll(`"s`, `'s`) - // text = text.replaceAll(`"S`, `'S`) - // text = text.replaceAll(`"m`, `'m`) - // text = text.replaceAll(`"d`, `'d`) - // text = text.replaceAll(`"ve`, `'ve`) - // text = text.replaceAll(`"clock`, `'clock`) - console.log('splitEnArticle', text) - let sections = text.split('\n'); - let sentences = text - console.log('sections',sections) + let keyboardMap = EnKeyboardMap + text = "On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration. It was Sunday? I never get up early on Sundays! I sometimes stay in bed until lunchtime. Last Sunday I got up very late. I looked out of the window. It was dark outside. 'What a day!' I thought. 'It's raining again.' Just then, the telephone rang. It was my aunt Lucy. 'I've just arrived by train,' she said. 'I'm coming to see you.'\n 'But I'm still having breakfast,' I said\n 'What are you doing?' she asked.\n 'I'm having breakfast,' I repeated.\n 'Dear me,' she said. 'Do you always get up so late? It's one o'clock!'" + //去除头和尾部的空格 + text = text.trim() + // text = text.replaceAll(`‘`, '"') + // text = text.replaceAll(`’`, '"') + // text = text.replaceAll(`“`, '"') + // text = text.replaceAll(`”`, '"') + // 替换所有单引号为双引号 + // text = text.replaceAll(`'`, '"') + // 将缩写词的双引号替换回单引号 + // text = text.replaceAll(`"t`, `'t`) + // text = text.replaceAll(`"s`, `'s`) + // text = text.replaceAll(`"S`, `'S`) + // text = text.replaceAll(`"m`, `'m`) + // text = text.replaceAll(`"d`, `'d`) + // text = text.replaceAll(`"ve`, `'ve`) + // text = text.replaceAll(`"re`, `'re`) + // text = text.replaceAll(`"clock`, `'clock`) + // console.log('splitEnArticle', text) + + let optional_options = {newline_boundaries: true}; + let sentences = tokenizer.sentences(text, optional_options); + console.log(sentences); + + let sections: Sentence[][] = [] + text.split('\n').map((rowSection, i) => { + rowSection = rowSection.trim() + // console.log(split(rowSection,{ + // SeparatorParser:{ + // separatorCharacters:['.'] + // } + // })) + return + // let section: Sentence[] = [] + // sections.push(section) + // + // // console.log('rowSection', rowSection) + // rowSection.split('.').map((rowSentence, j) => { + // + // rowSentence = rowSentence.trim() + // if (rowSentence) { + // //如果以.结尾,那么最后一项为空,忽略 + // // if (rowSentence && rowSentence[rowSentence.length - 1] !== "'") { + // // rowSentence += '.' + // // } + // if (rowSentence === '"') { + // let lastSentence = section[section.length - 1] + // lastSentence.text += '"' + // } else { + // console.log('rowSentence', rowSentence) + // rowSentence += '.' + // let sentence: Sentence = { + // text: rowSentence, + // translate: '', + // words: [] + // } + // section.push(sentence) + // } + // + // // rowSentence.split('').map((v, i, arr) => { + // // + // // }) + // // let word = cloneDeep({...DefaultArticleWord, name: '', nextSpace: true}); + // // rowSentence.split('').map((v, i, arr) => { + // // switch (v) { + // // case ' ': + // // if (word.name) { + // // sentence.words.push(word) + // // word = cloneDeep(DefaultArticleWord) + // // } + // // break + // // case keyboardMap.Period: + // // word.nextSpace = false + // // sentence.words.push(word) + // // sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true, isSymbol: true})) + // // word = cloneDeep(DefaultArticleWord) + // // break + // // case keyboardMap.Comma: + // // case keyboardMap.Slash: + // // case keyboardMap.Exclamation: + // // word.nextSpace = false + // // sentence.words.push(word) + // // sentence.words.push(cloneDeep({...DefaultArticleWord, name: v, nextSpace: true, isSymbol: true})) + // // section.push({ + // // text: '', + // // translate: '', + // // words: [] + // // }) + // // sentence = section[section.length - 1] + // // word = cloneDeep(DefaultArticleWord) + // // break + // // case keyboardMap.QuoteLeft: + // // let nearSymbolPosition = null + // // //TODO 可以优化成for+break + // // section.toReversed().map((sentenceItem, b) => { + // // sentenceItem.words.toReversed().map((wordItem, c) => { + // // if (wordItem.symbolPosition !== '' && nearSymbolPosition === null) { + // // nearSymbolPosition = wordItem.symbolPosition + // // } + // // }) + // // }) + // // + // // if (nearSymbolPosition === 'end' || nearSymbolPosition === null) { + // // sentence.words.push(cloneDeep({ + // // ...DefaultArticleWord, + // // name: v, + // // nextSpace: false, + // // isSymbol: true, + // // symbolPosition: 'start' + // // })) + // // word = cloneDeep(DefaultArticleWord) + // // } else { + // // let addCurrent = false + // // sentence.words.toReversed().map((wordItem, c) => { + // // if (wordItem.symbolPosition === 'start' && !addCurrent) { + // // addCurrent = true + // // } + // // }) + // // if (addCurrent) { + // // //`“这是私人谈话”`这种没有结束符号的情况,swtich走不到结束符号,也就不会起新的一行 + // // if (word.name.length) { + // // sentence.words.push(word) + // // } + // // sentence.words.push(cloneDeep({ + // // ...DefaultArticleWord, + // // name: v, + // // nextSpace: true, + // // isSymbol: true, + // // symbolPosition: 'end' + // // })) + // // word = cloneDeep(DefaultArticleWord) + // // } else { + // // debugger + // // let lastSentence = section[section.length - 2] + // // lastSentence.words[lastSentence.words.length - 1].nextSpace = false + // // lastSentence.words.push(cloneDeep({ + // // ...DefaultArticleWord, + // // name: v, + // // nextSpace: true, + // // isSymbol: true, + // // symbolPosition: 'end' + // // })) + // // } + // // } + // // break + // // default: + // // // if (v === '2')debugger + // // word.name += v + // // break + // // } + // // }) + // + // } + // }) + }) + + // sections = sections.filter(sectionItem => sectionItem.length) + // sections.map((sectionItem, a) => { + // sectionItem.map((sentenceItem, b) => { + // sentenceItem.text = sentenceItem.words.reduce((previousValue: string, currentValue) => { + // previousValue += currentValue.name + (currentValue.nextSpace ? ' ' : '') + // return previousValue + // }, '') + // }) + // }) + // console.log(sections) + // console.log('--') + // + // console.log(split(`'What a day!' I thought.`,{ + // SeparatorParser:{ + // separatorCharacters:['!'] + // } + // })) + // console.log('--') + // console.log(split(`On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration.`,)) } diff --git a/test/index.js b/test/index.js new file mode 100644 index 00000000..26df73e9 --- /dev/null +++ b/test/index.js @@ -0,0 +1,54 @@ +let text = `On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration. It was Sunday? I never get up early on Sundays! I sometimes stay in bed until lunchtime. Last Sunday I got up very late. I looked out of the window. It was dark outside. 'What a day!' I thought. 'It's raining again.' Just then, the telephone rang. It was my aunt Lucy. 'I've just arrived by train,' she said. 'I'm coming to see you.'\n 'But I'm still having breakfast,' I said\n 'What are you doing?' she asked.\n 'I'm having breakfast,' I repeated.\n 'Dear me,' she said. 'Do you always get up so late? It's one o'clock!'`; +// text = `How does the older investor differ in his approach to investment from the younger investor?\nThere is no shortage of tipsters around offering 'get-rich-quick' opportunities. But if you are a serious private investor, leave the Las Vegas mentality to those with money to fritter. The serious investor needs a proper 'portfolio' -- a well-planned selection of investments, with a definite structure and a clear aim. But exactly how does a newcomer to the stock market go about achieving that?\nWell, if you go to five reputable stock brokers and ask them what you should do with your money, you're likely to get five different answers, -- even if you give all the relevant information about your age age, family, finances and what you want from your investments. Moral? There is no one 'right' way to structure a portfolio. However, there are undoubtedly some wrong ways, and you can be sure that none of our five advisers would have suggested sinking all (or perhaps any) of your money into Periwigs*.\nSo what should you do? We'll assume that you have sorted out the basics -- like mortgages, pensions, insurance and access to sufficient cash reserves. You should then establish your own individual aims. These are partly a matter of personal circumstances, partly a matter of psychology.\nFor instance, if you are older you have less time to recover from any major losses, and you may well wish to boost your pension income. So preserving your capital and generating extra income are your main priorities. In this case, you'd probably construct a portfolio with some shares (but not high risk ones), along with gilts, cash deposits, and perhaps convertibles or the income shares of split capital investment trusts.\nIf you are younger, and in a solid financial position, you may decide to take an aggressive approach -- but only if you're blessed with a sanguine disposition and won't suffer sleepless nights over share prices. If portfolio, alongside your more pedestrian in vestments. Once you have decided on your investment aims, you can then decide where to put your money. The golden rule here is spread your risk -- if you put all of your money into Periwigs International, you're setting yourself up as a hostage to fortune.\n*'Periwigs' is the name of a fictitious company.\nINVESTOR'S CHRONICLE, March 23 1990`; +console.time() + + text = text.replaceAll(`'`, '"') + // 将缩写词的双引号替换回单引号 + text = text.replaceAll(`"t`, `'t`) + text = text.replaceAll(`"s`, `'s`) + text = text.replaceAll(`"S`, `'S`) + text = text.replaceAll(`"m`, `'m`) + text = text.replaceAll(`"d`, `'d`) + text = text.replaceAll(`"ve`, `'ve`) + text = text.replaceAll(`"re`, `'re`) + text = text.replaceAll(`"clock`, `'clock`) + +// var Tokenizer = require('sentence-tokenizer'); +// var tokenizer = new Tokenizer('Chuck'); + +// tokenizer.setEntry(v); +// console.log(tokenizer.getSentences()); +var tokenizer = require('sbd'); +var optional_options = { + newline_boundaries: true +}; +// // // text = "On Jan. 20, former Sen. Barack Obama became the 44th President of the U.S. Millions attended the Inauguration."; +// // text.split('\n').map(v=>{ + +// // }) +var sentences = tokenizer.sentences(text, optional_options); +console.log(sentences); + +// Load wink-nlp package. +const winkNLP = require('wink-nlp'); +// Load english language model. +const model = require('wink-eng-lite-web-model'); +// Instantiate winkNLP. +const nlp = winkNLP(model); +// Obtain "its" helper to extract item properties. +const its = nlp.its; +// Obtain "as" reducer helper to reduce a collection. +const as = nlp.as; + +// NLP Code. +// text = 'Hello World🌎! How are you?'; +const doc = nlp.readDoc(text); + +// console.log( doc.out() ); +// -> Hello World🌎! How are you? + +doc.sentences().each(v => { + console.log(v.out()); + // console.log(v.tokens().out()); +}) +console.timeEnd() diff --git a/test/package-lock.json b/test/package-lock.json new file mode 100644 index 00000000..9d64c919 --- /dev/null +++ b/test/package-lock.json @@ -0,0 +1,199 @@ +{ + "name": "test", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "test", + "version": "1.0.0", + "license": "ISC", + "dependencies": { + "sbd": "^1.0.19", + "sentence-tokenizer": "^1.0.1", + "wink-eng-lite-web-model": "^1.5.2", + "wink-nlp": "^1.14.3" + } + }, + "node_modules/debug": { + "version": "4.1.0", + "resolved": "https://registry.npmmirror.com/debug/-/debug-4.1.0.tgz", + "integrity": "sha512-heNPJUJIqC+xB6ayLAMHaIrmN9HKa7aQO8MGqKpvCA+uJYVcvR6l5kgdrhRuwPFHU7P5/A1w0BjByPHwpfTDKg==", + "deprecated": "Debug versions >=3.2.0 <3.2.7 || >=4 <4.3.1 have a low-severity ReDos regression when used in a Node.js environment. It is recommended you upgrade to 3.2.7 or 4.3.1. (https://github.com/visionmedia/debug/issues/797)", + "dependencies": { + "ms": "^2.1.1" + } + }, + "node_modules/deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmmirror.com/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmmirror.com/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmmirror.com/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmmirror.com/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + } + }, + "node_modules/domutils": { + "version": "3.1.0", + "resolved": "https://registry.npmmirror.com/domutils/-/domutils-3.1.0.tgz", + "integrity": "sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmmirror.com/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + } + }, + "node_modules/escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmmirror.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "engines": { + "node": ">=10" + } + }, + "node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmmirror.com/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmmirror.com/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/nanoid": { + "version": "3.3.6", + "resolved": "https://registry.npmmirror.com/nanoid/-/nanoid-3.3.6.tgz", + "integrity": "sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==", + "bin": { + "nanoid": "bin/nanoid.cjs" + }, + "engines": { + "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" + } + }, + "node_modules/parse-srcset": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/parse-srcset/-/parse-srcset-1.0.2.tgz", + "integrity": "sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==" + }, + "node_modules/picocolors": { + "version": "1.0.0", + "resolved": "https://registry.npmmirror.com/picocolors/-/picocolors-1.0.0.tgz", + "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==" + }, + "node_modules/postcss": { + "version": "8.4.30", + "resolved": "https://registry.npmmirror.com/postcss/-/postcss-8.4.30.tgz", + "integrity": "sha512-7ZEao1g4kd68l97aWG/etQKPKq07us0ieSZ2TnFDk11i0ZfDW2AwKHYU8qv4MZKqN2fdBfg+7q0ES06UA73C1g==", + "dependencies": { + "nanoid": "^3.3.6", + "picocolors": "^1.0.0", + "source-map-js": "^1.0.2" + }, + "engines": { + "node": "^10 || ^12 || >=14" + } + }, + "node_modules/sanitize-html": { + "version": "2.11.0", + "resolved": "https://registry.npmmirror.com/sanitize-html/-/sanitize-html-2.11.0.tgz", + "integrity": "sha512-BG68EDHRaGKqlsNjJ2xUB7gpInPA8gVx/mvjO743hZaeMCZ2DwzW7xvsqZ+KNU4QKwj86HJ3uu2liISf2qBBUA==", + "dependencies": { + "deepmerge": "^4.2.2", + "escape-string-regexp": "^4.0.0", + "htmlparser2": "^8.0.0", + "is-plain-object": "^5.0.0", + "parse-srcset": "^1.0.2", + "postcss": "^8.3.11" + } + }, + "node_modules/sbd": { + "version": "1.0.19", + "resolved": "https://registry.npmmirror.com/sbd/-/sbd-1.0.19.tgz", + "integrity": "sha512-b5RyZMGSrFuIB4AHdbv12uYHS8YGEJ36gtuvG3RflbJGY+T0dXmAL0E4vZjQqT2RsX0v+ZwVqhV2zsGr5aFK9w==", + "dependencies": { + "sanitize-html": "^2.3.2" + } + }, + "node_modules/sentence-tokenizer": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/sentence-tokenizer/-/sentence-tokenizer-1.0.1.tgz", + "integrity": "sha512-nmKF6fXmgZouD3AfWgYCmr35g7g7ObtbTlEFRVx2oj/ptrCOrosixrhXhWUdnPRdze7xhMf4IcliAa021BMXTA==", + "dependencies": { + "debug": "4.1.0" + } + }, + "node_modules/source-map-js": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/source-map-js/-/source-map-js-1.0.2.tgz", + "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/wink-eng-lite-web-model": { + "version": "1.5.2", + "resolved": "https://registry.npmmirror.com/wink-eng-lite-web-model/-/wink-eng-lite-web-model-1.5.2.tgz", + "integrity": "sha512-qYxAtJi0DuF0mVNr3wwdNj5u+jG9xH8BH45MbNLGzwQrUUpgKs87GfBLup0+ZfuXcpunw6AsgL7xtdl5vtUd2w==", + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "wink-nlp": ">1.8.1" + } + }, + "node_modules/wink-nlp": { + "version": "1.14.3", + "resolved": "https://registry.npmmirror.com/wink-nlp/-/wink-nlp-1.14.3.tgz", + "integrity": "sha512-lvY5iCs3T8I34F8WKS70+2P0U9dWLn3vdPf/Z+m2VK14N7OmqnPzmHfh3moHdusajoQ37Em39z0IZB9K4x/96A==" + } + } +} diff --git a/test/package.json b/test/package.json new file mode 100644 index 00000000..6104e0b6 --- /dev/null +++ b/test/package.json @@ -0,0 +1,18 @@ +{ + "name": "test", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "sbd": "^1.0.19", + "sentence-tokenizer": "^1.0.1", + "wink-eng-lite-web-model": "^1.5.2", + "wink-nlp": "^1.14.3" + } +}