@@ -697,19 +697,18 @@ export class Recorder {
697
697
requestId,
698
698
} ;
699
699
700
- // fetching using response stream, await here and then either call fulFill, or if not started, return false
701
- if ( contentLen < 0 ) {
702
- const fetcher = new ResponseStreamAsyncFetcher ( opts ) ;
703
- const res = await fetcher . load ( ) ;
704
- switch ( res ) {
705
- case "dupe" :
706
- this . removeReqResp ( networkId ) ;
707
- return false ;
708
-
709
- case "fetched" :
710
- streamingConsume = true ;
711
- break ;
712
- }
700
+ // fetching using response stream as first attempt,
701
+ // await here and then either call fulFill, or if dupe, return false
702
+ const fetcher = new ResponseStreamAsyncFetcher ( opts ) ;
703
+ const res = await fetcher . load ( ) ;
704
+ switch ( res ) {
705
+ case "dupe" :
706
+ this . removeReqResp ( networkId ) ;
707
+ return false ;
708
+
709
+ case "fetched" :
710
+ streamingConsume = true ;
711
+ break ;
713
712
}
714
713
715
714
// if not consumed via takeStream, attempt async loading
@@ -750,7 +749,12 @@ export class Recorder {
750
749
751
750
// if in browser context, and not also intercepted in page context
752
751
// serialize here, as won't be getting a loadingFinished message for it
753
- if ( isBrowserContext && ! reqresp . inPageContext && reqresp . payload ) {
752
+ if (
753
+ isBrowserContext &&
754
+ ! reqresp . inPageContext &&
755
+ reqresp . payload &&
756
+ reqresp . payload . length > 0
757
+ ) {
754
758
this . removeReqResp ( networkId ) ;
755
759
await this . serializeToWARC ( reqresp ) ;
756
760
}
@@ -788,7 +792,7 @@ export class Recorder {
788
792
? "document not loaded in browser, possibly other URLs missing"
789
793
: "URL not loaded in browser" ;
790
794
791
- logger . debug ( msg , { url, resourceType } , "recorder" ) ;
795
+ logger . debug ( msg , { url, resourceType, e } , "recorder" ) ;
792
796
}
793
797
794
798
return true ;
@@ -797,7 +801,11 @@ export class Recorder {
797
801
addAsyncFetch ( opts : NetworkLoadAsyncFetchOptions , contentLen : number ) {
798
802
let fetcher : AsyncFetcher ;
799
803
800
- if ( opts . reqresp . method !== "GET" || contentLen > MAX_NETWORK_LOAD_SIZE ) {
804
+ if (
805
+ opts . reqresp . method !== "GET" ||
806
+ contentLen > MAX_NETWORK_LOAD_SIZE ||
807
+ ! opts . reqresp . inPageContext
808
+ ) {
801
809
fetcher = new AsyncFetcher ( opts ) ;
802
810
} else {
803
811
fetcher = new NetworkLoadStreamAsyncFetcher ( opts ) ;
@@ -866,7 +874,7 @@ export class Recorder {
866
874
867
875
async awaitPageResources ( ) {
868
876
for ( const [ requestId , reqresp ] of this . pendingRequests . entries ( ) ) {
869
- if ( reqresp . payload ) {
877
+ if ( reqresp . payload && reqresp . payload . length > 0 ) {
870
878
this . removeReqResp ( requestId ) ;
871
879
await this . serializeToWARC ( reqresp ) ;
872
880
// if no url, and not fetch intercept or async loading,
@@ -1455,8 +1463,10 @@ class AsyncFetcher {
1455
1463
} ,
1456
1464
"recorder" ,
1457
1465
) ;
1458
- //await crawlState.removeDupe(ASYNC_FETCH_DUPE_KEY, url);
1459
- //return fetched;
1466
+ if ( status === 206 ) {
1467
+ await crawlState . removeDupe ( ASYNC_FETCH_DUPE_KEY , url , status ) ;
1468
+ return "notfetched" ;
1469
+ }
1460
1470
}
1461
1471
1462
1472
const externalBuffer : TempFileBuffer =
@@ -1521,8 +1531,10 @@ class AsyncFetcher {
1521
1531
const { method, url } = reqresp ;
1522
1532
logger . debug ( "Async started: fetch" , { url } , "recorder" ) ;
1523
1533
1524
- const headers = reqresp . getRequestHeadersDict ( ) ;
1525
-
1534
+ const headers = new Headers ( reqresp . getRequestHeadersDict ( ) ) ;
1535
+ if ( headers . has ( "range" ) ) {
1536
+ headers . set ( "range" , "bytes=0-" ) ;
1537
+ }
1526
1538
const dispatcher = getGlobalDispatcher ( ) . compose ( ( dispatch ) => {
1527
1539
return ( opts , handler ) => {
1528
1540
if ( opts . headers ) {
0 commit comments