-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathindex.js
86 lines (77 loc) · 2.72 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
/**
* Copyright (c) 2015, Jozef Stefan Institute, Quintelligence d.o.o. and contributors
* All rights reserved.
*
* This source code is licensed under the FreeBSD license found in the
* LICENSE file in the root directory of this source tree.
*/
const fs = require('fs');
// constructor
module.exports = class Dmoz {
constructor(params) {
let self = this;
// load binary module
const pathDmozBinary = __dirname + '/out/dmoz.node';
if (fs.existsSync(pathDmozBinary)) {
self._classifier = require(pathDmozBinary);
} else {
throw new Error('dmoz binary not found!');
}
// check if classifier file exists
if (fs.existsSync(params.classifier)) {
// it does, just load it
self._classifier.load(params);
} else {
// it does not, first create it from complete dmoz
self._classifier.init(params);
}
// load filter
let filters = fs.readFileSync(params.filter, "utf8").split("\n");
self.partials = [ ];
for (let filter of filters) {
// we only care about paltiar filters
if (filter.indexOf("*") != -1) {
// split on prefix and suffix
let partial = filter.split("*");
self.partials.push(partial);
}
}
console.log(self.partials.length);
}
_cleanCategory(category) {
let self = this;
// go over and see if we match a partial
for (let partial of self.partials) {
// if we match both prefix and suffix, then we found the match
if (category.startsWith(partial[0]) && category.endsWith(partial[1])) {
return partial[1];
}
}
// if no match, just remove the "Top/"
return category.slice(4);
}
classifyTop(text) {
let self = this;
// classify document
let results = self._classifier.classify(text, 1);
// we just care about top category
let topCat = results.categories.length > 0 ? results.categories[0] : "Top/";
// clean the name and return it
return self._cleanCategory(topCat);
}
classify(text, maxCats) {
let self = this;
// classify document
let results = self._classifier.classify(text, 3 * maxCats);
// clean categories
results = results.categories.map(cat => self._cleanCategory(cat));
// get first unique maxCats
let unique = new Set();
for (let category of results) {
unique.add(category);
if (unique.size == maxCats) { break; }
}
// return top unique categories
return Array.from(unique);
}
}