-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathopenAIClient.ts
72 lines (64 loc) · 2.27 KB
/
openAIClient.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import { OpenAI } from "langchain/llms/openai";
import { loadSummarizationChain } from "langchain/chains";
import { PuppeteerWebBaseLoader } from "langchain/document_loaders/web/puppeteer";
export class OpenAIClient {
private model: OpenAI;
constructor(openAIApiKey: string) {
this.model = new OpenAI({ openAIApiKey, temperature: 0, modelName: "gpt-3.5-turbo" });
}
async complete(prompt: string) {
return await this.model.call(prompt);
}
async summarize(url: string) {
const summarizationChain = loadSummarizationChain(this.model, {
type: "map_reduce",
});
const docs = await this.getWebpageTextDocs(url);
if (docs.length === 0 || docs[0].pageContent.length < 30) {
return "";
}
try {
const res = await summarizationChain.call({
input_documents: docs,
});
console.info("🚀 ~ summarize result", res.text);
return res.text;
} catch (e) {
console.error(e);
return "";
}
}
private async getWebpageTextDocs(url: string) {
const loader = new PuppeteerWebBaseLoader(url, {
launchOptions: {
headless: "new",
args: ["--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"],
},
gotoOptions: {
waitUntil: "domcontentloaded",
},
async evaluate(page) {
await page.setViewport({
width: 1920,
height: 1080,
});
const result = await page.evaluate(async () => {
// wait page load
await new Promise((resolve) => setTimeout(resolve, 1000));
// remove unnecessary elements
const scripts = document.body.querySelectorAll("script");
const noscript = document.body.querySelectorAll("noscript");
const styles = document.body.querySelectorAll("style");
const scriptAndStyle = [...scripts, ...noscript, ...styles];
scriptAndStyle.forEach((e) => e.remove());
// collect text
const mainElement = document.querySelector("main");
const text = mainElement ? mainElement.innerText : document.body.innerText;
return text.slice(0, 20000);
});
return result;
},
});
return await loader.loadAndSplit();
}
}