-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgulpfile.js
121 lines (107 loc) · 2.86 KB
/
gulpfile.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
var
gulp = require('gulp'),
gutil = require('gulp-util'),
rename = require('gulp-rename'),
map = require('vinyl-map'),
convertEncoding = require('gulp-convert-encoding'),
string = require('string'),
remoteSrc = require('gulp-remote-src'),
runSequence = require('run-sequence');
var encoding = "ucs2";
gulp.task('update-bgen', function () {
var bgen = [];
for (var i = 1; i <= 32; i++) {
bgen.push("d" + string(i).padLeft(2, "0") + ".txt");
}
return remoteSrc(bgen, {
base: 'http://sourceforge.net/p/bgoffice/code/HEAD/tree/trunk/dictionaries/data/bg-en/',
qs: { format: 'raw' }
})
.pipe(gulp.dest('./bgoffice/bg-en/'));
});
gulp.task('update-enbg', function () {
var enbg = [];
for (var i = 1; i <= 26; i++) {
enbg.push("d" + string(i).padLeft(2, "0") + ".txt");
}
return remoteSrc(enbg, {
base: 'http://sourceforge.net/p/bgoffice/code/HEAD/tree/trunk/dictionaries/data/en-bg/',
qs: { format: 'raw' }
})
.pipe(gulp.dest('./bgoffice/en-bg/'));
});
gulp.task('update-data', function (callback) {
runSequence(
'update-bgen',
'update-enbg',
callback
);
});
gulp.task('convert-data', function () {
var textToJSON2 = map(function(code, filename) {
code = code.toString(encoding);
var out = "";
var lines = code.split('\n');
var items = [];
var item = {};
var isWord = true;
var isFirst = false;
for(var i = 0; i < lines.length; i++) {
var line = lines[i];
if (!line) {
if (!isWord) {
items.push(item);
item = {};
isWord = true;
isFirst = false;
}
continue;
}
if (isWord) {
// word
item["w"] = line;
isWord = false;
isFirst = true;
}
else {
if (isFirst && string(line).startsWith("[")) {
// transcript
var tr = string(line)
.replaceAll('§','ʌ')
.replaceAll('a:','ɑ:')
.replaceAll('Ў','æ')
.replaceAll('ў','ə')
.replaceAll('¦','ʃ')
.replaceAll('Ґ','θ')
.replaceAll('¤','ŋ')
.replaceAll('','ð')
.replaceAll('Ј','ɔ')
.replaceAll('©','ʒ')
.replaceAll('u','ʊ')
.replaceAll('Ё','e').s;
item["t"] = tr;
continue;
}
if (isFirst) {
isFirst = false;
}
// meaning
if (!item["m"])
item["m"] = "";
item["m"] += line + "\r\n";
}
}
return JSON.stringify(items);
});
return gulp.src(['./bgoffice/**/d*.txt'])
.pipe(convertEncoding({from: 'win1251', to: encoding}))
.pipe(textToJSON2)
.pipe(rename({extname: ".json"}))
.pipe(gulp.dest('./www/data/'));
});
gulp.task('default', function(callback) {
runSequence(
'convert-data',
callback
);
});