-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathindex.js
231 lines (226 loc) · 7.83 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/**
* wellbeing_analysis
* v4.0.0
*
* Analyse positive / negative PERMA wellbeing expression
* in English or Spanish strings.
*
* DISCLAIMER:
* wellbeing_analysis is provided for educational and entertainment purposes
* only. It does not provide, and is not a substitute for, medical advice
* or diagnosis.
*
* Help me make this better:
* /~https://github.com/phugh/wellbeing_analysis
*
* Based on this paper:
* Schwartz, H. A., Eichstaedt, J. C., Kern, M. L., Dziurzynski, L.,
* Ramones, S. M., Agrawal, M., Shah, A., Kosinski, M., Stillwell, D.,
* Seligman, M. E., & Ungar, L. H. (2013).
* Personality, gender, and age in the language of social media:
* The Open-Vocabulary Approach. PLOS ONE, 8(9), . . e73791.
*
* Using the permaV3_dd and dd_spermaV3 lexicon data from
* http://www.wwbp.org/lexica.html. Used under the Creative Commons
* Attribution-NonCommercial-ShareAlike 3.0 Unported licence
*
* (C) 2017-18 P. Hughes
* Licence : Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported
* http://creativecommons.org/licenses/by-nc-sa/3.0/
*
* Usage example:
* const wba = require("wellbeing_analysis");
* // These are the default options
* const opts = {
* 'encoding': 'binary',
* 'lang': 'english',
* 'locale': 'US',
* 'logs': 3,
* 'max': Number.POSITIVE_INFINITY,
* 'min': Number.NEGATIVE_INFINITY,
* 'nGrams': [2, 3],
* 'noInt': false,
* 'output': 'lex',
* 'places': undefined,
* 'sortBy': 'freq',
* 'wcGrams': false,
* };
* const str = 'A big long string of text...';
* const wellbeing = wba(str, opts);
* console.log(wellbeing);
*
* See README.md for help.
*
* @param {string} str input string
* @param {Object} [opts] options object
* @return {Object} PERMA object
*/
(function() {
'use strict';
// Lexicon data
const english = require('./data/english.json');
const spanish = require('./data/spanish.json');
// External modules
const async = require('async');
const trans = require('british_american_translate');
const simplengrams = require('simplengrams');
const tokenizer = require('happynodetokenizer');
const lexHelpers = require('lex-helpers');
const arr2string = lexHelpers.arr2string;
const doLex = lexHelpers.doLex;
const doMatches = lexHelpers.doMatches;
const getMatches = lexHelpers.getMatches;
const itemCount = lexHelpers.itemCount;
/**
* @function wellbeingAnalysis
* @param {string} str input string
* @param {Object} [opts] options object
* @return {Object} PERMA object
*/
const wellbeingAnalysis = (str, opts = {}) => {
// default options
opts.encoding = (typeof opts.encoding === 'undefined') ? 'binary' : opts.encoding;
opts.lang = (typeof opts.lang === 'undefined') ? 'english' : opts.lang;
opts.locale = (typeof opts.locale === 'undefined') ? 'US' : opts.locale;
opts.logs = (typeof opts.logs === 'undefined') ? 3 : opts.logs;
if (opts.suppressLog) opts.logs = 0;
opts.max = (typeof opts.max === 'undefined') ? Number.POSITIVE_INFINITY : opts.max;
opts.min = (typeof opts.min === 'undefined') ? Number.NEGATIVE_INFINITY : opts.min;
if (typeof opts.max !== 'number' || typeof opts.min !== 'number') {
// try to convert to a number
opts.min = Number(opts.min);
opts.max = Number(opts.max);
// check it worked, or else default to infinity
opts.max = (typeof opts.max === 'number') ? opts.max : Number.POSITIVE_INFINITY;
opts.min = (typeof opts.min === 'number') ? opts.min : Number.NEGATIVE_INFINITY;
}
opts.nGrams = (typeof opts.nGrams !== 'undefined') ? opts.nGrams : [2, 3];
if (!Array.isArray(opts.nGrams)) {
if (opts.nGrams == 0) {
opts.nGrams = [0];
} else {
if (opts.logs > 1) {
console.warn('wellbeingAnalysis: nGrams option must be an array! Defaulting to [2, 3].');
}
opts.nGrams = [2, 3];
}
}
opts.noInt = (typeof opts.noInt === 'undefined') ? false : opts.noInt;
opts.output = (typeof opts.output === 'undefined') ? 'lex' : opts.output;
opts.sortBy = (typeof opts.sortBy === 'undefined') ? 'freq' : opts.sortBy;
opts.wcGrams = (typeof opts.wcGrams === 'undefined') ? false : opts.wcGrams;
// cache frequently used options
const encoding = opts.encoding;
const logs = opts.logs;
const nGrams = opts.nGrams;
const output = opts.output;
const places = opts.places;
const sortBy = opts.sortBy;
// no string return null
if (!str) {
if (opts.logs > 1) console.warn('wellbeingAnalysis: no string found. Returning null.');
return null;
}
// if str isn't a string, make it into one
if (typeof str !== 'string') str = str.toString();
// trim whitespace and convert to lowercase
str = str.trim().toLowerCase();
// translalte US English to UK English if selected
if (opts.lang.match(/english/gi) && opts.locale.match(/gb/gi)) str = trans.uk2us(str);
// convert our string to tokens
let tokens = tokenizer(str, {logs: opts.logs});
// if there are no tokens return null
if (!tokens) {
if (logs > 1) console.warn('wellbeingAnalysis: no tokens found. Returned null.');
return null;
}
// get wordcount before we add ngrams
let wordcount = tokens.length;
// get n-grams
if (nGrams && !nGrams.includes(0)) {
async.each(nGrams, function(n, callback) {
if (wordcount < n) {
callback(`wordcount (${wordcount}) less than n-gram value (${n}). Ignoring.`);
} else {
tokens = [
...arr2string(simplengrams(str, n, {logs: logs})),
...tokens,
];
callback();
}
}, function(err) {
if (err && logs > 1) console.warn('wellbeingAnalysis: ', err);
});
}
// recalculate wordcount if wcGrams is true
if (opts.wcGrams === true) wordcount = tokens.length;
// set intercept value
let lexicon = english;
let ints = {
POS_P: 0,
POS_E: 0,
POS_R: 0,
POS_M: 0,
POS_A: 0,
NEG_P: 0,
NEG_E: 0,
NEG_R: 0,
NEG_M: 0,
NEG_A: 0,
};
// use spanish lexicon if selected
if (output.match(/spanish/gi) || output.match(/espanol/gi)) {
lexicon = spanish;
if (opts.noInt == false) {
ints = {
POS_P: 2.675173871,
POS_E: 2.055179283,
POS_R: 1.977389757,
POS_M: 1.738298902,
POS_A: 3.414517804,
NEG_P: 2.50468297,
NEG_E: 1.673629622,
NEG_R: 1.782788984,
NEG_M: 1.52890284,
NEG_A: 2.482131179,
};
}
}
// get matches from array
const matches = getMatches(itemCount(tokens), lexicon, opts.min, opts.max);
// returns
if (output.match(/matches/gi)) {
// return requested output
return doMatches(matches, encoding, wordcount, sortBy, places);
} else if (output.match(/full/gi)) {
// return matches and values in one object
let results;
async.parallel({
matches: function(callback) {
callback(null, doMatches(matches, encoding, wordcount, sortBy, places));
},
values: function(callback) {
callback(null, doLex(matches, ints, encoding, wordcount, places));
},
}, function(err, res) {
if (err && logs > 0) console.error(err);
results = res;
});
return results;
} else {
if (!output.match(/lex/gi) && logs > 1) {
console.warn('wellbeingAnalysis: output option ("' + output + '") is invalid, defaulting to "lex".');
}
// return just the values
return doLex(matches, ints, encoding, wordcount, places);
}
};
if (typeof exports !== 'undefined') {
if (typeof module !== 'undefined' && module.exports) {
exports = module.exports = wellbeingAnalysis;
}
exports.wellbeingAnalysis = wellbeingAnalysis;
} else {
global.wellbeingAnalysis = wellbeingAnalysis;
}
})();