From e7a25e57fb1b36bfa25d564cc5fcbe2e458da94a Mon Sep 17 00:00:00 2001 From: "Daniel W. Hieber" Date: Fri, 5 Jan 2024 21:49:36 -0600 Subject: [PATCH] NEW: Support Morphemic Analysis (#39) closes #39 --- src/utilities/replaceHyphens.js | 8 ++ src/words/index.js | 7 +- src/words/morphemes.js | 16 +++ src/words/transcription.js | 2 +- test/words.test.js | 185 +++++++++++++++++++++----------- 5 files changed, 153 insertions(+), 65 deletions(-) create mode 100644 src/utilities/replaceHyphens.js create mode 100644 src/words/morphemes.js diff --git a/src/utilities/replaceHyphens.js b/src/utilities/replaceHyphens.js new file mode 100644 index 0000000..322855a --- /dev/null +++ b/src/utilities/replaceHyphens.js @@ -0,0 +1,8 @@ +/** + * Replaces regular hyphens (U+2010) with non-breaking hyphens (U+2011). + * @param {String} data + * @returns String + */ +export default function replaceHyphens(data) { + return data.replaceAll(`-`, `\u{2011}`) +} diff --git a/src/words/index.js b/src/words/index.js index 4c7471d..b073cea 100644 --- a/src/words/index.js +++ b/src/words/index.js @@ -1,4 +1,5 @@ import createLiteral from './literal.js' +import createMorphemes from './morphemes.js' import createTranscription from './transcription.js' export default function createWords(words, options) { @@ -10,15 +11,17 @@ export default function createWords(words, options) { for (const word of words) { const literal = createLiteral(word.literal, options) + const morphemes = createMorphemes(word.analysis, options) const transcription = createTranscription(word.transcription, options) - html += `\n
  • + html += `
  • ${ transcription } ${ literal } + ${ morphemes }
  • ` } - return `
      ${ html }\n
    ` + return `
      ${ html }
    ` } diff --git a/src/words/morphemes.js b/src/words/morphemes.js new file mode 100644 index 0000000..f5dca4a --- /dev/null +++ b/src/words/morphemes.js @@ -0,0 +1,16 @@ +import addEmphasis from '../utilities/addEmphasis.js' +import replaceHyphens from '../utilities/replaceHyphens.js' + +export default function createMorphemes(data, { targetLang }) { + + const lang = targetLang ? `lang='${ targetLang }'` : `` + let html = `` + + for (const ortho in data) { + const morphemes = addEmphasis(replaceHyphens(data[ortho])) + html += `${ morphemes }` + } + + return html + +} diff --git a/src/words/transcription.js b/src/words/transcription.js index b94e7fb..bce147f 100644 --- a/src/words/transcription.js +++ b/src/words/transcription.js @@ -7,7 +7,7 @@ export default function createTranscription(data, { targetLang }) { for (const ortho in data) { const txn = data[ortho] - html += `${ addEmphasis(txn) }\n` + html += `${ addEmphasis(txn) }` } return html diff --git a/test/words.test.js b/test/words.test.js index 70b76fb..4e13648 100644 --- a/test/words.test.js +++ b/test/words.test.js @@ -1,7 +1,8 @@ -import { expect } from 'chai' -import findElementByClass from './utilities/findElementByClass.js' -import { getTextContent } from '../node_modules/@web/parse5-utils/src/index.js' -import parse from './utilities/convertAndParse.js' +import { expect } from 'chai' +import findElementByClass from './utilities/findElementByClass.js' +import findElementsByClass from './utilities/findElementsByClass.js' +import { getTextContent } from '../node_modules/@web/parse5-utils/src/index.js' +import parse from './utilities/convertAndParse.js' import { findElement, @@ -41,90 +42,150 @@ describe(`words`, function() { }) - it(`word transcription`, async function() { + describe(`word transcription`, function() { - const scription = ` - \\w-mod waxdungu qasi - \\w-swad wašdungu ʔasi - \\m waxt-qungu qasi - \\gl day-one man - \\tln one day a man - ` + it(`renders in multiple orthographies`, async function() { - const { dom } = await parse(scription) - const wordsContainer = findElementByClass(dom, `words`) - const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) + const scription = ` + \\w-mod waxdungu qasi + \\w-swad wašdungu ʔasi + \\m waxt-qungu qasi + \\gl day-one man + \\tln one day a man + ` - const firstText = getTextContent(firstWord) - expect(firstText).to.include(`waxdungu`) - expect(firstText).to.include(`wašdungu`) + const { dom } = await parse(scription) + const wordsContainer = findElementByClass(dom, `words`) + const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) - const secondText = getTextContent(secondWord) - expect(secondText).to.include(`qasi`) - expect(secondText).to.include(`ʔasi`) + const firstText = getTextContent(firstWord) + expect(firstText).to.include(`waxdungu`) + expect(firstText).to.include(`wašdungu`) - }) + const secondText = getTextContent(secondWord) + expect(secondText).to.include(`qasi`) + expect(secondText).to.include(`ʔasi`) - it(`word transcription supports emphasis`, async function() { + }) - const scription = ` - \\w *waxdungu* qasi - \\wlt *one.day* man - ` + it(`supports emphasis`, async function() { - const { dom } = await parse(scription) - const b = findElement(dom, el => getTagName(el) === `b`) + const scription = ` + \\w *waxdungu* qasi + \\wlt *one.day* man + ` - expect(getTextContent(b)).to.equal(`waxdungu`) + const { dom } = await parse(scription) + const b = findElement(dom, el => getTagName(el) === `b`) + + expect(getTextContent(b)).to.equal(`waxdungu`) + + }) }) - it(`literal word translation (single language)`, async function() { + describe(`literal word translation`, function() { - const scription = ` - \\w waxdungu qasi - \\wlt one.day a.man - ` + it(`single language`, async function() { - const { dom } = await parse(scription) - const wordsContainer = findElementByClass(dom, `words`) - const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) + const scription = ` + \\w waxdungu qasi + \\wlt one.day a.man + ` - expect(getTextContent(firstWord)).to.include(`one.day`) - expect(getTextContent(secondWord)).to.include(`a.man`) + const { dom } = await parse(scription) + const wordsContainer = findElementByClass(dom, `words`) + const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) - }) + expect(getTextContent(firstWord)).to.include(`one.day`) + expect(getTextContent(secondWord)).to.include(`a.man`) - it(`literal word translation (single language)`, async function() { + }) - const scription = ` - \\w waxdungu qasi - \\wlt-en one.day a.man - \\wlt-sp un.día un.hombre - ` + it(`multiple languages`, async function() { + + const scription = ` + \\w waxdungu qasi + \\wlt-en one.day a.man + \\wlt-sp un.día un.hombre + ` + + const { dom } = await parse(scription) + const wordsContainer = findElementByClass(dom, `words`) + const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) + + expect(getTextContent(firstWord)).to.include(`one.day`) + expect(getTextContent(firstWord)).to.include(`un.día`) + expect(getTextContent(secondWord)).to.include(`a.man`) + expect(getTextContent(secondWord)).to.include(`un.hombre`) + + }) + + it(`supports emphasis`, async function() { - const { dom } = await parse(scription) - const wordsContainer = findElementByClass(dom, `words`) - const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`) + const scription = ` + \\w waxdungu qasi + \\wlt *one.day* man + ` - expect(getTextContent(firstWord)).to.include(`one.day`) - expect(getTextContent(firstWord)).to.include(`un.día`) - expect(getTextContent(secondWord)).to.include(`a.man`) - expect(getTextContent(secondWord)).to.include(`un.hombre`) + const { dom } = await parse(scription) + const b = findElement(dom, el => getTagName(el) === `b`) + + expect(getTextContent(b)).to.equal(`one.day`) + + }) }) - it(`literal word translation supports emphasis`, async function() { + describe(`morphemic analysis`, function() { - const scription = ` - \\w waxdungu qasi - \\wlt *one.day* man - ` + it(`renders (with non-breaking hyphens)`, async function() { + + const scription = ` + ninakupenda + ni-na-ku-pend-a + 1SG.SUBJ-PRES-2SG.OBJ-love-IND + I love you + ` + + const { dom } = await parse(scription) + const morphemes = findElementByClass(dom, `w-m`) + + expect(getTextContent(morphemes)).to.equal(`ni‑na‑ku‑pend‑a`) // non-breaking hyphens + + }) + + it(`supports multiple orthographies`, async function() { + + const scription = ` + \\m-mod waxt-qungu qasi + \\m-swad wašt-ʔungu ʔasi + \\gl day-one man + ` + + const { dom, html } = await parse(scription) + const [mod, swad] = findElementsByClass(dom, `w-m`) + + expect(getTextContent(mod)).to.equal(`waxt‑qungu`) // non-breaking hypens + expect(getTextContent(swad)).to.equal(`wašt‑ʔungu`) // non-breaking hypens + + }) + + it(`supports emphasis`, async function() { + + const scription = ` + ninakupenda + ni-na-ku-*pend*-a + 1SG.SUBJ-PRES-2SG.OBJ-love-IND + I love you + ` + + const { dom } = await parse(scription) + const b = findElement(dom, el => getTagName(el) === `b`) - const { dom } = await parse(scription) - const b = findElement(dom, el => getTagName(el) === `b`) + expect(getTextContent(b)).to.equal(`pend`) // non-breaking hyphens - expect(getTextContent(b)).to.equal(`one.day`) + }) })