NEW: Support Morphemic Analysis (#39)

closes #39
digitallinguistics · Jan 6, 2024 · e7a25e5 · e7a25e5
1 parent 3029855
commit e7a25e5
Show file tree

Hide file tree

Showing 5 changed files with 153 additions and 65 deletions.
diff --git a/src/utilities/replaceHyphens.js b/src/utilities/replaceHyphens.js
@@ -0,0 +1,8 @@
+/**
+ * Replaces regular hyphens (U+2010) with non-breaking hyphens (U+2011).
+ * @param {String} data
+ * @returns String
+ */
+export default function replaceHyphens(data) {
+  return data.replaceAll(`-`, `\u{2011}`)
+}
diff --git a/src/words/index.js b/src/words/index.js
@@ -1,4 +1,5 @@
 import createLiteral       from './literal.js'
+import createMorphemes     from './morphemes.js'
 import createTranscription from './transcription.js'
 
 export default function createWords(words, options) {
@@ -10,15 +11,17 @@ export default function createWords(words, options) {
   for (const word of words) {
 
     const literal       = createLiteral(word.literal, options)
+    const morphemes     = createMorphemes(word.analysis, options)
     const transcription = createTranscription(word.transcription, options)
 
-    html += `\n<li class=word>
+    html += `<li class=word>
       ${ transcription }
       ${ literal }
+      ${ morphemes }
     </li>`
 
   }
 
-  return `<ol class=words>${ html }\n</ol>`
+  return `<ol class=words>${ html }</ol>`
 
 }
diff --git a/src/words/morphemes.js b/src/words/morphemes.js
@@ -0,0 +1,16 @@
+import addEmphasis    from '../utilities/addEmphasis.js'
+import replaceHyphens from '../utilities/replaceHyphens.js'
+
+export default function createMorphemes(data, { targetLang }) {
+
+  const lang = targetLang ? `lang='${ targetLang }'` : ``
+  let html = ``
+
+  for (const ortho in data) {
+    const morphemes = addEmphasis(replaceHyphens(data[ortho]))
+    html += `<span class=w-m data-ortho='${ ortho }' ${ lang }>${ morphemes }</span>`
+  }
+
+  return html
+
+}
diff --git a/src/words/transcription.js b/src/words/transcription.js
@@ -7,7 +7,7 @@ export default function createTranscription(data, { targetLang }) {
 
   for (const ortho in data) {
     const txn = data[ortho]
-    html += `<span class=w-txn data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>\n`
+    html += `<span class=w-txn data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>`
   }
 
   return html

diff --git a/test/words.test.js b/test/words.test.js
@@ -1,7 +1,8 @@
-import { expect }         from 'chai'
-import findElementByClass from './utilities/findElementByClass.js'
-import { getTextContent } from '../node_modules/@web/parse5-utils/src/index.js'
-import parse              from './utilities/convertAndParse.js'
+import { expect }          from 'chai'
+import findElementByClass  from './utilities/findElementByClass.js'
+import findElementsByClass from './utilities/findElementsByClass.js'
+import { getTextContent }  from '../node_modules/@web/parse5-utils/src/index.js'
+import parse               from './utilities/convertAndParse.js'
 
 import {
   findElement,
@@ -41,90 +42,150 @@ describe(`words`, function() {
 
   })
 
-  it(`word transcription`, async function() {
+  describe(`word transcription`, function() {
 
-    const scription = `
-    \\w-mod  waxdungu   qasi
-    \\w-swad wašdungu   ʔasi
-    \\m      waxt-qungu qasi
-    \\gl     day-one    man
-    \\tln    one day a man
-    `
+    it(`renders in multiple orthographies`, async function() {
 
-    const { dom }                 = await parse(scription)
-    const wordsContainer          = findElementByClass(dom, `words`)
-    const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
+      const scription = `
+      \\w-mod  waxdungu   qasi
+      \\w-swad wašdungu   ʔasi
+      \\m      waxt-qungu qasi
+      \\gl     day-one    man
+      \\tln    one day a man
+      `
 
-    const firstText  = getTextContent(firstWord)
-    expect(firstText).to.include(`waxdungu`)
-    expect(firstText).to.include(`wašdungu`)
+      const { dom }                 = await parse(scription)
+      const wordsContainer          = findElementByClass(dom, `words`)
+      const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
 
-    const secondText = getTextContent(secondWord)
-    expect(secondText).to.include(`qasi`)
-    expect(secondText).to.include(`ʔasi`)
+      const firstText = getTextContent(firstWord)
+      expect(firstText).to.include(`waxdungu`)
+      expect(firstText).to.include(`wašdungu`)
 
-  })
+      const secondText = getTextContent(secondWord)
+      expect(secondText).to.include(`qasi`)
+      expect(secondText).to.include(`ʔasi`)
 
-  it(`word transcription supports emphasis`, async function() {
+    })
 
-    const scription = `
-    \\w   *waxdungu* qasi
-    \\wlt *one.day*  man
-    `
+    it(`supports emphasis`, async function() {
 
-    const { dom } = await parse(scription)
-    const b       = findElement(dom, el => getTagName(el) === `b`)
+      const scription = `
+      \\w   *waxdungu* qasi
+      \\wlt *one.day*  man
+      `
 
-    expect(getTextContent(b)).to.equal(`waxdungu`)
+      const { dom } = await parse(scription)
+      const b       = findElement(dom, el => getTagName(el) === `b`)
+
+      expect(getTextContent(b)).to.equal(`waxdungu`)
+
+    })
 
   })
 
-  it(`literal word translation (single language)`, async function() {
+  describe(`literal word translation`, function() {
 
-    const scription = `
-    \\w   waxdungu qasi
-    \\wlt one.day  a.man
-    `
+    it(`single language`, async function() {
 
-    const { dom }                 = await parse(scription)
-    const wordsContainer          = findElementByClass(dom, `words`)
-    const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
+      const scription = `
+      \\w   waxdungu qasi
+      \\wlt one.day  a.man
+      `
 
-    expect(getTextContent(firstWord)).to.include(`one.day`)
-    expect(getTextContent(secondWord)).to.include(`a.man`)
+      const { dom }                 = await parse(scription)
+      const wordsContainer          = findElementByClass(dom, `words`)
+      const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
 
-  })
+      expect(getTextContent(firstWord)).to.include(`one.day`)
+      expect(getTextContent(secondWord)).to.include(`a.man`)
 
-  it(`literal word translation (single language)`, async function() {
+    })
 
-    const scription = `
-    \\w      waxdungu qasi
-    \\wlt-en one.day  a.man
-    \\wlt-sp un.día   un.hombre
-    `
+    it(`multiple languages`, async function() {
+
+      const scription = `
+      \\w      waxdungu qasi
+      \\wlt-en one.day  a.man
+      \\wlt-sp un.día   un.hombre
+      `
+
+      const { dom }                 = await parse(scription)
+      const wordsContainer          = findElementByClass(dom, `words`)
+      const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
+
+      expect(getTextContent(firstWord)).to.include(`one.day`)
+      expect(getTextContent(firstWord)).to.include(`un.día`)
+      expect(getTextContent(secondWord)).to.include(`a.man`)
+      expect(getTextContent(secondWord)).to.include(`un.hombre`)
+
+    })
+
+    it(`supports emphasis`, async function() {
 
-    const { dom }                 = await parse(scription)
-    const wordsContainer          = findElementByClass(dom, `words`)
-    const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
+      const scription = `
+      \\w   waxdungu  qasi
+      \\wlt *one.day* man
+      `
 
-    expect(getTextContent(firstWord)).to.include(`one.day`)
-    expect(getTextContent(firstWord)).to.include(`un.día`)
-    expect(getTextContent(secondWord)).to.include(`a.man`)
-    expect(getTextContent(secondWord)).to.include(`un.hombre`)
+      const { dom } = await parse(scription)
+      const b       = findElement(dom, el => getTagName(el) === `b`)
+
+      expect(getTextContent(b)).to.equal(`one.day`)
+
+    })
 
   })
 
-  it(`literal word translation supports emphasis`, async function() {
+  describe(`morphemic analysis`, function() {
 
-    const scription = `
-    \\w   waxdungu  qasi
-    \\wlt *one.day* man
-    `
+    it(`renders (with non-breaking hyphens)`, async function() {
+
+      const scription = `
+      ninakupenda
+      ni-na-ku-pend-a
+      1SG.SUBJ-PRES-2SG.OBJ-love-IND
+      I love you
+      `
+
+      const { dom }   = await parse(scription)
+      const morphemes = findElementByClass(dom, `w-m`)
+
+      expect(getTextContent(morphemes)).to.equal(`ni‑na‑ku‑pend‑a`) // non-breaking hyphens
+
+    })
+
+    it(`supports multiple orthographies`, async function() {
+
+      const scription = `
+      \\m-mod  waxt-qungu qasi
+      \\m-swad wašt-ʔungu ʔasi
+      \\gl     day-one  man
+      `
+
+      const { dom, html } = await parse(scription)
+      const [mod, swad]   = findElementsByClass(dom, `w-m`)
+
+      expect(getTextContent(mod)).to.equal(`waxt‑qungu`) // non-breaking hypens
+      expect(getTextContent(swad)).to.equal(`wašt‑ʔungu`) // non-breaking hypens
+
+    })
+
+    it(`supports emphasis`, async function() {
+
+      const scription = `
+      ninakupenda
+      ni-na-ku-*pend*-a
+      1SG.SUBJ-PRES-2SG.OBJ-love-IND
+      I love you
+      `
+
+      const { dom } = await parse(scription)
+      const b       = findElement(dom, el => getTagName(el) === `b`)
 
-    const { dom } = await parse(scription)
-    const b       = findElement(dom, el => getTagName(el) === `b`)
+      expect(getTextContent(b)).to.equal(`pend`) // non-breaking hyphens
 
-    expect(getTextContent(b)).to.equal(`one.day`)
+    })
 
   })