Skip to content

Commit

Permalink
NEW: Support Morphemic Analysis (#39)
Browse files Browse the repository at this point in the history
closes #39
  • Loading branch information
dwhieb committed Jan 6, 2024
1 parent 3029855 commit e7a25e5
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 65 deletions.
8 changes: 8 additions & 0 deletions src/utilities/replaceHyphens.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/**
* Replaces regular hyphens (U+2010) with non-breaking hyphens (U+2011).
* @param {String} data
* @returns String
*/
export default function replaceHyphens(data) {
return data.replaceAll(`-`, `\u{2011}`)
}
7 changes: 5 additions & 2 deletions src/words/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import createLiteral from './literal.js'
import createMorphemes from './morphemes.js'
import createTranscription from './transcription.js'

export default function createWords(words, options) {
Expand All @@ -10,15 +11,17 @@ export default function createWords(words, options) {
for (const word of words) {

const literal = createLiteral(word.literal, options)
const morphemes = createMorphemes(word.analysis, options)
const transcription = createTranscription(word.transcription, options)

html += `\n<li class=word>
html += `<li class=word>
${ transcription }
${ literal }
${ morphemes }
</li>`

}

return `<ol class=words>${ html }\n</ol>`
return `<ol class=words>${ html }</ol>`

}
16 changes: 16 additions & 0 deletions src/words/morphemes.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import addEmphasis from '../utilities/addEmphasis.js'
import replaceHyphens from '../utilities/replaceHyphens.js'

export default function createMorphemes(data, { targetLang }) {

const lang = targetLang ? `lang='${ targetLang }'` : ``
let html = ``

for (const ortho in data) {
const morphemes = addEmphasis(replaceHyphens(data[ortho]))
html += `<span class=w-m data-ortho='${ ortho }' ${ lang }>${ morphemes }</span>`
}

return html

}
2 changes: 1 addition & 1 deletion src/words/transcription.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export default function createTranscription(data, { targetLang }) {

for (const ortho in data) {
const txn = data[ortho]
html += `<span class=w-txn data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>\n`
html += `<span class=w-txn data-ortho='${ ortho }' ${ lang }>${ addEmphasis(txn) }</span>`
}

return html
Expand Down
185 changes: 123 additions & 62 deletions test/words.test.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { expect } from 'chai'
import findElementByClass from './utilities/findElementByClass.js'
import { getTextContent } from '../node_modules/@web/parse5-utils/src/index.js'
import parse from './utilities/convertAndParse.js'
import { expect } from 'chai'
import findElementByClass from './utilities/findElementByClass.js'
import findElementsByClass from './utilities/findElementsByClass.js'
import { getTextContent } from '../node_modules/@web/parse5-utils/src/index.js'
import parse from './utilities/convertAndParse.js'

import {
findElement,
Expand Down Expand Up @@ -41,90 +42,150 @@ describe(`words`, function() {

})

it(`word transcription`, async function() {
describe(`word transcription`, function() {

const scription = `
\\w-mod waxdungu qasi
\\w-swad wašdungu ʔasi
\\m waxt-qungu qasi
\\gl day-one man
\\tln one day a man
`
it(`renders in multiple orthographies`, async function() {

const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
const scription = `
\\w-mod waxdungu qasi
\\w-swad wašdungu ʔasi
\\m waxt-qungu qasi
\\gl day-one man
\\tln one day a man
`

const firstText = getTextContent(firstWord)
expect(firstText).to.include(`waxdungu`)
expect(firstText).to.include(`wašdungu`)
const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)

const secondText = getTextContent(secondWord)
expect(secondText).to.include(`qasi`)
expect(secondText).to.include(`ʔasi`)
const firstText = getTextContent(firstWord)
expect(firstText).to.include(`waxdungu`)
expect(firstText).to.include(`wašdungu`)

})
const secondText = getTextContent(secondWord)
expect(secondText).to.include(`qasi`)
expect(secondText).to.include(`ʔasi`)

it(`word transcription supports emphasis`, async function() {
})

const scription = `
\\w *waxdungu* qasi
\\wlt *one.day* man
`
it(`supports emphasis`, async function() {

const { dom } = await parse(scription)
const b = findElement(dom, el => getTagName(el) === `b`)
const scription = `
\\w *waxdungu* qasi
\\wlt *one.day* man
`

expect(getTextContent(b)).to.equal(`waxdungu`)
const { dom } = await parse(scription)
const b = findElement(dom, el => getTagName(el) === `b`)

expect(getTextContent(b)).to.equal(`waxdungu`)

})

})

it(`literal word translation (single language)`, async function() {
describe(`literal word translation`, function() {

const scription = `
\\w waxdungu qasi
\\wlt one.day a.man
`
it(`single language`, async function() {

const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
const scription = `
\\w waxdungu qasi
\\wlt one.day a.man
`

expect(getTextContent(firstWord)).to.include(`one.day`)
expect(getTextContent(secondWord)).to.include(`a.man`)
const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)

})
expect(getTextContent(firstWord)).to.include(`one.day`)
expect(getTextContent(secondWord)).to.include(`a.man`)

it(`literal word translation (single language)`, async function() {
})

const scription = `
\\w waxdungu qasi
\\wlt-en one.day a.man
\\wlt-sp un.día un.hombre
`
it(`multiple languages`, async function() {

const scription = `
\\w waxdungu qasi
\\wlt-en one.day a.man
\\wlt-sp un.día un.hombre
`

const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)

expect(getTextContent(firstWord)).to.include(`one.day`)
expect(getTextContent(firstWord)).to.include(`un.día`)
expect(getTextContent(secondWord)).to.include(`a.man`)
expect(getTextContent(secondWord)).to.include(`un.hombre`)

})

it(`supports emphasis`, async function() {

const { dom } = await parse(scription)
const wordsContainer = findElementByClass(dom, `words`)
const [firstWord, secondWord] = wordsContainer.childNodes.filter(node => node.tagName === `li`)
const scription = `
\\w waxdungu qasi
\\wlt *one.day* man
`

expect(getTextContent(firstWord)).to.include(`one.day`)
expect(getTextContent(firstWord)).to.include(`un.día`)
expect(getTextContent(secondWord)).to.include(`a.man`)
expect(getTextContent(secondWord)).to.include(`un.hombre`)
const { dom } = await parse(scription)
const b = findElement(dom, el => getTagName(el) === `b`)

expect(getTextContent(b)).to.equal(`one.day`)

})

})

it(`literal word translation supports emphasis`, async function() {
describe(`morphemic analysis`, function() {

const scription = `
\\w waxdungu qasi
\\wlt *one.day* man
`
it(`renders (with non-breaking hyphens)`, async function() {

const scription = `
ninakupenda
ni-na-ku-pend-a
1SG.SUBJ-PRES-2SG.OBJ-love-IND
I love you
`

const { dom } = await parse(scription)
const morphemes = findElementByClass(dom, `w-m`)

expect(getTextContent(morphemes)).to.equal(`ni‑na‑ku‑pend‑a`) // non-breaking hyphens

})

it(`supports multiple orthographies`, async function() {

const scription = `
\\m-mod waxt-qungu qasi
\\m-swad wašt-ʔungu ʔasi
\\gl day-one man
`

const { dom, html } = await parse(scription)
const [mod, swad] = findElementsByClass(dom, `w-m`)

expect(getTextContent(mod)).to.equal(`waxt‑qungu`) // non-breaking hypens
expect(getTextContent(swad)).to.equal(`wašt‑ʔungu`) // non-breaking hypens

})

it(`supports emphasis`, async function() {

const scription = `
ninakupenda
ni-na-ku-*pend*-a
1SG.SUBJ-PRES-2SG.OBJ-love-IND
I love you
`

const { dom } = await parse(scription)
const b = findElement(dom, el => getTagName(el) === `b`)

const { dom } = await parse(scription)
const b = findElement(dom, el => getTagName(el) === `b`)
expect(getTextContent(b)).to.equal(`pend`) // non-breaking hyphens

expect(getTextContent(b)).to.equal(`one.day`)
})

})

Expand Down

0 comments on commit e7a25e5

Please sign in to comment.