Skip to content

Commit 5ea30d3

Browse files
ikreymertw4l
authored andcommitted
further cleanup:
- interrupt pending requests when page is finished, so pageinfo record is written after - add pageFinished flag to recorder, remove unused 'skipping' flag - renable attempt refetch, should be using dedup
1 parent 2fa5df8 commit 5ea30d3

File tree

1 file changed

+21
-22
lines changed

1 file changed

+21
-22
lines changed

src/util/recorder.ts

+21-22
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ export class Recorder {
131131
// TODO: Fix this the next time the file is edited.
132132
// eslint-disable-next-line @typescript-eslint/no-explicit-any
133133
logDetails: Record<string, any> = {};
134-
skipping = false;
134+
135+
pageFinished = false;
135136

136137
gzip = true;
137138

@@ -170,6 +171,7 @@ export class Recorder {
170171
frameIdToExecId: Map<string, number>;
171172
}) {
172173
this.frameIdToExecId = frameIdToExecId;
174+
this.pageFinished = false;
173175

174176
// Fetch
175177
cdp.on("Fetch.requestPaused", (params) => {
@@ -435,23 +437,21 @@ export class Recorder {
435437
) {
436438
this.removeReqResp(requestId);
437439
return this.serializeToWARC(reqresp);
440+
} else if (url && reqresp.requestHeaders && type === "Media") {
441+
this.removeReqResp(requestId);
442+
logger.warn(
443+
"Attempt direct fetch of failed request",
444+
{ url, ...this.logDetails },
445+
"recorder",
446+
);
447+
const fetcher = new AsyncFetcher({
448+
reqresp,
449+
recorder: this,
450+
networkId: requestId,
451+
});
452+
void this.fetcherQ.add(() => fetcher.load());
453+
return;
438454
}
439-
// disable for now, driven by fetch from behaviors likely
440-
// else if (url && reqresp.requestHeaders && type === "Media") {
441-
// this.removeReqResp(requestId);
442-
// logger.warn(
443-
// "Attempt direct fetch of failed request",
444-
// { url, ...this.logDetails },
445-
// "recorder",
446-
// );
447-
// const fetcher = new AsyncFetcher({
448-
// reqresp,
449-
// recorder: this,
450-
// networkId: requestId,
451-
// });
452-
// void this.fetcherQ.add(() => fetcher.load());
453-
// return;
454-
// }
455455
break;
456456

457457
default:
@@ -819,7 +819,7 @@ export class Recorder {
819819
this.pendingRequests = new Map();
820820
this.skipIds = new Set();
821821
this.skipRangeUrls = new Map<string, number>();
822-
this.skipping = false;
822+
this.pageFinished = false;
823823
this.pageInfo = {
824824
pageid,
825825
urls: {},
@@ -889,6 +889,7 @@ export class Recorder {
889889
let pending = [];
890890
while (
891891
numPending &&
892+
!this.pageFinished &&
892893
!this.crawler.interrupted &&
893894
!this.crawler.postCrawling
894895
) {
@@ -938,6 +939,8 @@ export class Recorder {
938939
async onClosePage() {
939940
// Any page-specific handling before page is closed.
940941
this.frameIdToExecId = null;
942+
943+
this.pageFinished = true;
941944
}
942945

943946
async onDone(timeout: number) {
@@ -1164,10 +1167,6 @@ export class Recorder {
11641167
logNetwork("Skipping ignored id", { requestId });
11651168
return null;
11661169
}
1167-
if (this.skipping) {
1168-
//logger.debug("Skipping request, page already finished", this.logDetails, "recorder");
1169-
return null;
1170-
}
11711170
const reqresp = new RequestResponseInfo(requestId);
11721171
this.pendingRequests.set(requestId, reqresp);
11731172
return reqresp;

0 commit comments

Comments
 (0)