-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathformatting-evaluator-module.ts
178 lines (162 loc) · 6.64 KB
/
formatting-evaluator-module.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import { Value } from "@sinclair/typebox/value";
import Decimal from "decimal.js";
import { JSDOM } from "jsdom";
import MarkdownIt from "markdown-it";
import { commentEnum, CommentType } from "../configuration/comment-types";
import configuration from "../configuration/config-reader";
import {
FormattingEvaluatorConfiguration,
formattingEvaluatorConfigurationType,
} from "../configuration/formatting-evaluator-config";
import logger from "../helpers/logger";
import { IssueActivity } from "../issue-activity";
import { GithubCommentScore, Module, Result } from "./processor";
interface Multiplier {
multiplier: number;
html: FormattingEvaluatorConfiguration["multipliers"][0]["rewards"]["html"];
regex: FormattingEvaluatorConfiguration["multipliers"][0]["rewards"]["regex"];
}
export class FormattingEvaluatorModule implements Module {
private readonly _configuration: FormattingEvaluatorConfiguration | null =
configuration.incentives.formattingEvaluator;
private readonly _md = new MarkdownIt();
private readonly _multipliers: { [k: number]: Multiplier } = {};
private readonly _wordCountExponent: number;
_getEnumValue(key: CommentType) {
let res = 0;
key.split("_").forEach((value) => {
res |= Number(commentEnum[value as keyof typeof commentEnum]);
});
return res;
}
constructor() {
if (this._configuration?.multipliers) {
this._multipliers = this._configuration.multipliers.reduce((acc, curr) => {
return {
...acc,
[curr.role.reduce((a, b) => this._getEnumValue(b) | a, 0)]: {
html: curr.rewards.html,
multiplier: curr.multiplier,
regex: curr.rewards.regex,
},
};
}, {});
}
this._wordCountExponent = this._configuration?.wordCountExponent ?? 0.85;
}
async transform(data: Readonly<IssueActivity>, result: Result) {
for (const key of Object.keys(result)) {
const currentElement = result[key];
const comments = currentElement.comments || [];
for (let i = 0; i < comments.length; i++) {
const comment = comments[i];
const { formatting } = this._getFormattingScore(comment);
const multiplierFactor = this._multipliers?.[comment.type] ?? { multiplier: 0 };
const formattingTotal = this._calculateFormattingTotal(formatting, multiplierFactor).toDecimalPlaces(2);
comment.score = {
...comment.score,
formatting: {
content: formatting,
multiplier: multiplierFactor.multiplier,
},
reward: (comment.score?.reward ? formattingTotal.add(comment.score.reward) : formattingTotal).toNumber(),
};
}
}
return result;
}
private _calculateFormattingTotal(
formatting: ReturnType<typeof this._getFormattingScore>["formatting"],
multiplierFactor: Multiplier
): Decimal {
if (!formatting) return new Decimal(0);
return Object.values(formatting).reduce((acc, curr) => {
let sum = new Decimal(0);
for (const symbol of Object.keys(curr.symbols)) {
const count = new Decimal(curr.symbols[symbol].count);
const symbolMultiplier = new Decimal(curr.symbols[symbol].multiplier);
const formattingElementScore = new Decimal(curr.score);
const exponent = this._wordCountExponent;
sum = sum.add(
count
.pow(exponent) // (count^exponent)
.mul(symbolMultiplier) // symbol multiplier
.mul(formattingElementScore) // comment type multiplier
.mul(multiplierFactor.multiplier) // formatting element score
);
}
return acc.add(sum);
}, new Decimal(0));
}
get enabled(): boolean {
if (!Value.Check(formattingEvaluatorConfigurationType, this._configuration)) {
console.warn("Invalid / missing configuration detected for FormattingEvaluatorModule, disabling.");
return false;
}
return true;
}
_getFormattingScore(comment: GithubCommentScore) {
// Change the \r to \n to fix markup interpretation
const html = this._md.render(comment.content.replaceAll("\r", "\n"));
logger.debug("Will analyze formatting for the current content", { comment: comment.content, html });
const temp = new JSDOM(html);
if (temp.window.document.body) {
const res = this.classifyTagsWithWordCount(temp.window.document.body, comment.type);
return { formatting: res };
} else {
throw new Error(`Could not create DOM for comment [${comment}]`);
}
}
_countSymbols(regexes: FormattingEvaluatorConfiguration["multipliers"][0]["rewards"]["regex"], text: string) {
const counts: { [p: string]: { count: number; multiplier: number } } = {};
for (const [regex, multiplier] of Object.entries(regexes)) {
const match = text.trim().match(new RegExp(regex, "g"));
counts[regex] = {
count: match?.length || 1,
multiplier,
};
}
return counts;
}
classifyTagsWithWordCount(htmlElement: HTMLElement, commentType: GithubCommentScore["type"]) {
const tagWordCount: Record<
string,
{ symbols: { [p: string]: { count: number; multiplier: number } }; score: number }
> = {};
const elements = htmlElement.getElementsByTagName("*");
for (const element of elements) {
const tagName = element.tagName.toLowerCase();
// We cannot use textContent otherwise we would duplicate counts, so instead we extract text nodes
const textNodes = Array.from(element?.childNodes || []).filter((node) => node.nodeType === 3);
const innerText = textNodes
.map((node) => node.nodeValue?.trim())
.join(" ")
.trim();
const symbols = this._countSymbols(this._multipliers[commentType].regex, innerText);
let score = 0;
if (this._multipliers[commentType]?.html[tagName] !== undefined) {
score = this._multipliers[commentType].html[tagName];
} else {
logger.error(`Could not find multiplier for comment [${commentType}], <${tagName}>`);
}
logger.debug("Tag content results", { tagName, symbols, text: element.textContent });
// If we already had that tag included in the result, merge them and update total count
if (Object.keys(tagWordCount).includes(tagName)) {
for (const [k, v] of Object.entries(symbols)) {
if (Object.keys(tagWordCount[tagName].symbols).includes(k)) {
tagWordCount[tagName].symbols[k] = {
...tagWordCount[tagName].symbols[k],
count: tagWordCount[tagName].symbols[k].count + v.count,
};
}
}
} else {
tagWordCount[tagName] = {
symbols: symbols,
score,
};
}
}
return tagWordCount;
}
}