From 171d669d2ed69249c1336311493ed8cf906575e2 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 13 Dec 2024 21:20:25 +0530 Subject: [PATCH 01/12] feat: pass context script for webdriver --- .../classes/RemoteBrowser.ts | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 04fb59b32..2dab103a6 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -225,6 +225,31 @@ export class RemoteBrowser { contextOptions.userAgent = browserUserAgent; this.context = await this.browser.newContext(contextOptions); + await this.context.addInitScript( + `const defaultGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" + ).get; + defaultGetter.apply(navigator); + defaultGetter.toString(); + Object.defineProperty(Navigator.prototype, "webdriver", { + set: undefined, + enumerable: true, + configurable: true, + get: new Proxy(defaultGetter, { + apply: (target, thisArg, args) => { + Reflect.apply(target, thisArg, args); + return false; + }, + }), + }); + const patchedGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" + ).get; + patchedGetter.apply(navigator); + patchedGetter.toString();` + ); this.currentPage = await this.context.newPage(); await this.setupPageEventListeners(this.currentPage); From cd05ddfb3c780aa26ec1e6108c4f173dafe26291 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 13 Dec 2024 21:21:00 +0530 Subject: [PATCH 02/12] chore: lint --- .../src/browser-management/classes/RemoteBrowser.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 2dab103a6..b19b5cbc9 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -104,7 +104,7 @@ export class RemoteBrowser { } catch { return url; } - } + } /** * Determines if a URL change is significant enough to emit @@ -130,11 +130,11 @@ export class RemoteBrowser { }); // Handle page load events with retry mechanism - page.on('load', async () => { + page.on('load', async () => { const injectScript = async (): Promise => { try { await page.waitForLoadState('networkidle', { timeout: 5000 }); - + await page.evaluate(getInjectableScript()); return true; } catch (error: any) { @@ -201,7 +201,7 @@ export class RemoteBrowser { const contextOptions: any = { viewport: { height: 400, width: 900 }, // recordVideo: { dir: 'videos/' } - // Force reduced motion to prevent animation issues + // Force reduced motion to prevent animation issues reducedMotion: 'reduce', // Force JavaScript to be enabled javaScriptEnabled: true, @@ -249,7 +249,7 @@ export class RemoteBrowser { ).get; patchedGetter.apply(navigator); patchedGetter.toString();` - ); + ); this.currentPage = await this.context.newPage(); await this.setupPageEventListeners(this.currentPage); @@ -481,7 +481,7 @@ export class RemoteBrowser { this.currentPage = newPage; if (this.currentPage) { await this.setupPageEventListeners(this.currentPage); - + this.client = await this.currentPage.context().newCDPSession(this.currentPage); await this.subscribeToScreencast(); } else { From c49e70a1eeab9ef569f1e41738bea8882915ec22 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 13 Dec 2024 22:04:35 +0530 Subject: [PATCH 03/12] chrome and chromium user agent --- server/src/browser-management/classes/RemoteBrowser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index b19b5cbc9..e5d3217e8 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -220,8 +220,7 @@ export class RemoteBrowser { password: proxyOptions.password ? proxyOptions.password : undefined, }; } - const browserUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.38 Safari/537.36"; - + const browserUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.140 Chromium/131.0.6778.140 Safari/537.36"; contextOptions.userAgent = browserUserAgent; this.context = await this.browser.newContext(contextOptions); From b173ce3e98d9bcc2838de7ee257f3b8ee5e95a92 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 13 Dec 2024 22:05:51 +0530 Subject: [PATCH 04/12] chore: remove commented code --- .../classes/RemoteBrowser.ts | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index e5d3217e8..0299faf65 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -155,35 +155,6 @@ export class RemoteBrowser { * @returns {Promise} */ public initialize = async (userId: string): Promise => { - // const launchOptions = { - // headless: true, - // proxy: options.launchOptions?.proxy, - // chromiumSandbox: false, - // args: [ - // '--no-sandbox', - // '--disable-setuid-sandbox', - // '--headless=new', - // '--disable-gpu', - // '--disable-dev-shm-usage', - // '--disable-software-rasterizer', - // '--in-process-gpu', - // '--disable-infobars', - // '--single-process', - // '--no-zygote', - // '--disable-notifications', - // '--disable-extensions', - // '--disable-background-timer-throttling', - // ...(options.launchOptions?.args || []) - // ], - // env: { - // ...process.env, - // CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new' - // } - // }; - // console.log('Launch options before:', options.launchOptions); - // this.browser = (await options.browser.launch(launchOptions)); - - // console.log('Launch options after:', options.launchOptions) this.browser = (await chromium.launch({ headless: true, })); @@ -253,9 +224,6 @@ export class RemoteBrowser { await this.setupPageEventListeners(this.currentPage); - // await this.currentPage.setExtraHTTPHeaders({ - // 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' - // }); const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); await blocker.enableBlockingInPage(this.currentPage); this.client = await this.currentPage.context().newCDPSession(this.currentPage); From 06184010ae1a789c86bc80d00cd082752fdd1444 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 14 Dec 2024 06:58:29 +0530 Subject: [PATCH 05/12] feat: args --- server/src/browser-management/classes/RemoteBrowser.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 0299faf65..9dc516908 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -157,6 +157,13 @@ export class RemoteBrowser { public initialize = async (userId: string): Promise => { this.browser = (await chromium.launch({ headless: true, + args: [ + "--disable-blink-features=AutomationControlled", + "--disable-web-security", + "--disable-features=IsolateOrigins,site-per-process", + "--disable-site-isolation-trials", + "--disable-extensions" + ], })); const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; From 7f48464eea993f0d4468942cdeb77c87398191f8 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 18:35:38 +0530 Subject: [PATCH 06/12] feat: add page navigation timeout --- maxun-core/src/interpret.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d1cc8318d..848ddd768 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -365,7 +365,7 @@ export default class Interpreter extends EventEmitter { try { const newPage = await context.newPage(); await newPage.goto(link); - await newPage.waitForLoadState('networkidle'); + await newPage.waitForLoadState('domcontentloaded'); await this.runLoop(newPage, this.initializedWorkflow!); } catch (e) { // `runLoop` uses soft mode, so it recovers from it's own exceptions @@ -576,7 +576,7 @@ export default class Interpreter extends EventEmitter { } await Promise.all([ nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'networkidle' }) + page.waitForNavigation({ waitUntil: 'domcontentloaded' }) ]); await page.waitForTimeout(1000); @@ -767,6 +767,8 @@ export default class Interpreter extends EventEmitter { public async run(page: Page, params?: ParamType): Promise { this.log('Starting the workflow.', Level.LOG); const context = page.context(); + + page.setDefaultNavigationTimeout(100000); // Check proxy settings from context options const contextOptions = (context as any)._options; From bdf908e37cdcb2200cb5c653a5149db279ce51aa Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 18:36:59 +0530 Subject: [PATCH 07/12] feat: add domcontentloaded wait load state --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 57be015ed..2cde90e4c 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -189,7 +189,7 @@ export class WorkflowGenerator { * * This function also makes sure to add a waitForLoadState and a generated flag * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) - * action waits for the networkidle event to be fired, + * action waits for the domcontentloaded event to be fired, * and the generated flag action is used for making pausing the interpretation possible. * * @param pair The pair to add to the workflow. @@ -217,7 +217,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['networkidle'], + args: ['domcontentloaded'], }); } this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); @@ -232,7 +232,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['networkidle'], + args: ['domcontentloaded'], }); } if (this.generatedData.lastIndex === 0) { From f38230d1b4d45d266886679c3228d07d2f52d18d Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 20:30:24 +0530 Subject: [PATCH 08/12] feat: revert to networkidle for wait load state --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 2cde90e4c..57be015ed 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -189,7 +189,7 @@ export class WorkflowGenerator { * * This function also makes sure to add a waitForLoadState and a generated flag * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) - * action waits for the domcontentloaded event to be fired, + * action waits for the networkidle event to be fired, * and the generated flag action is used for making pausing the interpretation possible. * * @param pair The pair to add to the workflow. @@ -217,7 +217,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['domcontentloaded'], + args: ['networkidle'], }); } this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); @@ -232,7 +232,7 @@ export class WorkflowGenerator { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', - args: ['domcontentloaded'], + args: ['networkidle'], }); } if (this.generatedData.lastIndex === 0) { From 7ce7a1598c3c394d8107677859991257460755ee Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 14 Dec 2024 20:32:07 +0530 Subject: [PATCH 09/12] feat: check for selector visibility in getState --- maxun-core/src/interpret.ts | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 848ddd768..e11ae255a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -192,8 +192,8 @@ export default class Interpreter extends EventEmitter { // const actionable = async (selector: string): Promise => { // try { // const proms = [ - // page.isEnabled(selector, { timeout: 5000 }), - // page.isVisible(selector, { timeout: 5000 }), + // page.isEnabled(selector, { timeout: 10000 }), + // page.isVisible(selector, { timeout: 10000 }), // ]; // return await Promise.all(proms).then((bools) => bools.every((x) => x)); @@ -214,6 +214,17 @@ export default class Interpreter extends EventEmitter { // return []; // }), // ).then((x) => x.flat()); + + const presentSelectors: SelectorArray = await Promise.all( + selectors.map(async (selector) => { + try { + await page.waitForSelector(selector, { state: 'attached' }); + return [selector]; + } catch (e) { + return []; + } + }), + ).then((x) => x.flat()); const action = workflowCopy[workflowCopy.length - 1]; @@ -233,7 +244,7 @@ export default class Interpreter extends EventEmitter { ...p, [cookie.name]: cookie.value, }), {}), - selectors, + selectors: presentSelectors, }; } @@ -365,7 +376,7 @@ export default class Interpreter extends EventEmitter { try { const newPage = await context.newPage(); await newPage.goto(link); - await newPage.waitForLoadState('domcontentloaded'); + await newPage.waitForLoadState('networkidle'); await this.runLoop(newPage, this.initializedWorkflow!); } catch (e) { // `runLoop` uses soft mode, so it recovers from it's own exceptions @@ -576,7 +587,7 @@ export default class Interpreter extends EventEmitter { } await Promise.all([ nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'domcontentloaded' }) + page.waitForNavigation({ waitUntil: 'networkidle' }) ]); await page.waitForTimeout(1000); From e22c019a0c6feed2b2c3e2ecb2aa4b749dd0f226 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 14 Dec 2024 22:30:50 +0530 Subject: [PATCH 10/12] feat: rotate user agents --- .../browser-management/classes/RemoteBrowser.ts | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 9dc516908..05927b24c 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -148,6 +148,19 @@ export class RemoteBrowser { }); } + private getUserAgent() { + const userAgents = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.140 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.1938.81 Safari/537.36 Edg/116.0.1938.81', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.96 Safari/537.36 OPR/101.0.4843.25', + 'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0', + ]; + + return userAgents[Math.floor(Math.random() * userAgents.length)]; + } + /** * An asynchronous constructor for asynchronously initialized properties. * Must be called right after creating an instance of RemoteBrowser class. @@ -198,9 +211,8 @@ export class RemoteBrowser { password: proxyOptions.password ? proxyOptions.password : undefined, }; } - const browserUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.140 Chromium/131.0.6778.140 Safari/537.36"; - contextOptions.userAgent = browserUserAgent; + contextOptions.userAgent = this.getUserAgent(); this.context = await this.browser.newContext(contextOptions); await this.context.addInitScript( `const defaultGetter = Object.getOwnPropertyDescriptor( From 320f24ec002256067c418f45efbb48258599c893 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 15 Dec 2024 01:06:23 +0530 Subject: [PATCH 11/12] feat: shm & sandbox args --- server/src/browser-management/classes/RemoteBrowser.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 05927b24c..4b059cdaf 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -175,7 +175,9 @@ export class RemoteBrowser { "--disable-web-security", "--disable-features=IsolateOrigins,site-per-process", "--disable-site-isolation-trials", - "--disable-extensions" + "--disable-extensions", + "--no-sandbox", + "--disable-dev-shm-usage", ], })); const proxyConfig = await getDecryptedProxyConfig(userId); @@ -201,7 +203,7 @@ export class RemoteBrowser { // Disable hardware acceleration forcedColors: 'none', isMobile: false, - hasTouch: false + hasTouch: false, }; if (proxyOptions.server) { @@ -212,7 +214,6 @@ export class RemoteBrowser { }; } - contextOptions.userAgent = this.getUserAgent(); this.context = await this.browser.newContext(contextOptions); await this.context.addInitScript( `const defaultGetter = Object.getOwnPropertyDescriptor( From ffe87b0c7db7b0e6446c3f0fb2a5d67e313f29b8 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 15 Dec 2024 01:06:45 +0530 Subject: [PATCH 12/12] feat: user getUserAgent() --- server/src/browser-management/classes/RemoteBrowser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 4b059cdaf..31aceadaa 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -204,6 +204,7 @@ export class RemoteBrowser { forcedColors: 'none', isMobile: false, hasTouch: false, + userAgent: this.getUserAgent(), }; if (proxyOptions.server) {