Skip to content

Commit 5c00bca

Browse files
authored
tests: use old.webrecorder.net for testing (#710)
replace webrecorder.net -> old.webrecorder.net to fix tests relying on old website for now
1 parent 181d9b8 commit 5c00bca

16 files changed

+49
-67
lines changed

tests/add-exclusion.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ test("dynamically add exclusion while crawl is running", async () => {
1616

1717
try {
1818
exec(
19-
"docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis",
19+
"docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://old.webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis",
2020
{ shell: "/bin/bash" },
2121
callback,
2222
);

tests/brave-query-redir.test.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { execSync } from "child_process";
44
test("check that gclid query URL is automatically redirected to remove it", async () => {
55
try {
66
execSync(
7-
"docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX");
7+
"docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://old.webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX");
88

99
} catch (error) {
1010
console.log(error.stderr);
@@ -23,9 +23,9 @@ test("check that gclid query URL is automatically redirected to remove it", asyn
2323
for (const line of lines) {
2424
const json = line.split(" ").slice(2).join(" ");
2525
const data = JSON.parse(json);
26-
if (data.url === "https://webrecorder.net/about?gclid=abc" && data.status === "307") {
26+
if (data.url === "https://old.webrecorder.net/about?gclid=abc" && data.status === "307") {
2727
redirectFound = true;
28-
} else if (data.url === "https://webrecorder.net/about" && data.status === "200") {
28+
} else if (data.url === "https://old.webrecorder.net/about" && data.status === "200") {
2929
responseFound = true;
3030
}
3131
if (responseFound && redirectFound) {

tests/custom-behavior.test.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import child_process from "child_process";
22

33
test("test custom behaviors", async () => {
44
const res = child_process.execSync(
5-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
5+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
66
);
77

88
const log = res.toString();
@@ -27,10 +27,10 @@ test("test custom behaviors", async () => {
2727
) > 0,
2828
).toBe(true);
2929

30-
// another custom behavior ran for webrecorder.net
30+
// another custom behavior ran for old.webrecorder.net
3131
expect(
3232
log.indexOf(
33-
'{"state":{},"msg":"test-stat-2","page":"https://webrecorder.net/","workerid":0}}',
33+
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}',
3434
) > 0,
3535
).toBe(true);
3636
});
@@ -40,7 +40,7 @@ test("test invalid behavior exit", async () => {
4040

4141
try {
4242
child_process.execSync(
43-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
43+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
4444
);
4545
} catch (e) {
4646
status = e.status;

tests/custom-behaviors/custom-2.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class TestBehavior2 {
1111
}
1212

1313
static isMatch() {
14-
return window.location.origin === "https://webrecorder.net";
14+
return window.location.origin === "https://old.webrecorder.net";
1515
}
1616

1717
async *run(ctx) {

tests/dryrun.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import fs from "fs";
33

44
test("ensure dryRun crawl only writes pages and logs", async () => {
55
child_process.execSync(
6-
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun --exclude community',
6+
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --generateWACZ --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun --exclude community',
77
);
88

99
const files = fs.readdirSync("test-crawls/collections/dry-run-wr-net").sort();

tests/extra_hops_depth.test.js

+7-7
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ test(
1212
async () => {
1313
try {
1414
await exec(
15-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-beyond --extraHops 2 --url https://webrecorder.net/ --limit 5 --timeout 10 --exclude community --exclude tools",
15+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-beyond --extraHops 2 --url https://old.webrecorder.net/ --limit 5 --timeout 10 --exclude community --exclude tools",
1616
);
1717
} catch (error) {
1818
console.log(error);
@@ -31,14 +31,14 @@ test(
3131
const crawledExtraPagesArray = crawledExtraPages.trim().split("\n");
3232

3333
const expectedPages = [
34-
"https://webrecorder.net/",
34+
"https://old.webrecorder.net/",
3535
];
3636

3737
const expectedExtraPages = [
38-
"https://webrecorder.net/blog",
39-
"https://webrecorder.net/about",
40-
"https://webrecorder.net/contact",
41-
"https://webrecorder.net/faq",
38+
"https://old.webrecorder.net/blog",
39+
"https://old.webrecorder.net/about",
40+
"https://old.webrecorder.net/contact",
41+
"https://old.webrecorder.net/faq",
4242
];
4343

4444
// first line is the header, not page, so adding -1
@@ -74,7 +74,7 @@ test(
7474
test("extra hops applies beyond depth limit", () => {
7575
try {
7676
execSync(
77-
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-depth-0 --extraHops 1 --url https://webrecorder.net/ --limit 2 --depth 0 --timeout 10 --exclude community --exclude tools",
77+
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-depth-0 --extraHops 1 --url https://old.webrecorder.net/ --limit 2 --depth 0 --timeout 10 --exclude community --exclude tools",
7878
);
7979
} catch (error) {
8080
console.log(error);

tests/file_stats.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import fs from "fs";
33

44
test("ensure that stats file is modified", async () => {
55
const child = child_process.exec(
6-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --limit 3 --exclude community --collection file-stats --statsFilename progress.json",
6+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --generateWACZ --text --limit 3 --exclude community --collection file-stats --statsFilename progress.json",
77
);
88

99
// detect crawler exit

tests/limit_reached.test.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const exec = util.promisify(execCallback);
66

77
test("ensure page limit reached", async () => {
88
execSync(
9-
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --scopeType prefix --behaviors "" --url https://webrecorder.net/ --limit 12 --workers 2 --collection limit-test --statsFilename stats.json --exclude community',
9+
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --scopeType prefix --behaviors "" --url https://old.webrecorder.net/ --limit 12 --workers 2 --collection limit-test --statsFilename stats.json --exclude community',
1010
);
1111
});
1212

@@ -22,7 +22,7 @@ test("ensure crawl fails if failOnFailedLimit is reached", async () => {
2222
let passed = true;
2323
try {
2424
await exec(
25-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
25+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
2626
);
2727
} catch (error) {
2828
expect(error.code).toEqual(17);

tests/mult_url_crawl_with_favicon.test.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(...
66

77
test("ensure multi url crawl run with docker run passes", async () => {
88
child_process.execSync(
9-
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://webrecorder.net/ --generateWACZ --text --collection advanced --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --pages 2 --limit 2 --exclude community',
9+
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --url https://old.webrecorder.net/ --generateWACZ --text --collection advanced --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --pages 2 --limit 2 --exclude community',
1010
);
1111
});
1212

@@ -39,9 +39,9 @@ test("check that the favicon made it into the pages jsonl file", () => {
3939
);
4040
const data = [data1, data2];
4141
for (const d of data) {
42-
if (d.url === "https://webrecorder.net/") {
42+
if (d.url === "https://old.webrecorder.net/") {
4343
expect(d.favIconUrl).toEqual(
44-
"https://webrecorder.net/assets/favicon.ico",
44+
"https://old.webrecorder.net/assets/favicon.ico",
4545
);
4646
}
4747
if (d.url === "https://iana.org/") {

tests/multi-instance-crawl.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ afterAll(async () => {
3333
});
3434

3535
function runCrawl(name) {
36-
const crawler = exec(`docker run --rm -v $PWD/test-crawls:/crawls --network=crawl --hostname=${name} webrecorder/browsertrix-crawler crawl --url https://www.webrecorder.net/ --limit 4 --exclude community --collection shared-${name} --crawlId testcrawl --redisStoreUrl redis://redis:6379`);
36+
const crawler = exec(`docker run --rm -v $PWD/test-crawls:/crawls --network=crawl --hostname=${name} webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 4 --exclude community --collection shared-${name} --crawlId testcrawl --redisStoreUrl redis://redis:6379`);
3737

3838
return new Promise((resolve) => {
3939
crawler.on("exit", (code) => {

tests/pageinfo-records.test.js

+17-35
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { WARCParser } from "warcio";
55

66
test("run warc and ensure pageinfo records contain the correct resources", async () => {
77
child_process.execSync(
8-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://invalid.invalid/ --scopeType page --collection page-info-test --combineWARC",
8+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --url https://old.webrecorder.net/about --url https://invalid.invalid/ --scopeType page --collection page-info-test --combineWARC",
99
);
1010

1111
const filename = path.join(
@@ -26,7 +26,7 @@ test("run warc and ensure pageinfo records contain the correct resources", async
2626
for await (const record of parser) {
2727
if (
2828
!foundIndex &&
29-
record.warcTargetURI === "urn:pageinfo:https://webrecorder.net/"
29+
record.warcTargetURI === "urn:pageinfo:https://old.webrecorder.net/"
3030
) {
3131
foundIndex = true;
3232
const text = await record.contentText();
@@ -35,7 +35,7 @@ test("run warc and ensure pageinfo records contain the correct resources", async
3535

3636
if (
3737
!foundAbout &&
38-
record.warcTargetURI === "urn:pageinfo:https://webrecorder.net/about"
38+
record.warcTargetURI === "urn:pageinfo:https://old.webrecorder.net/about"
3939
) {
4040
foundAbout = true;
4141
const text = await record.contentText();
@@ -64,47 +64,47 @@ function validateResourcesIndex(json) {
6464
expect(json).toHaveProperty("urls");
6565
expect(json.counts).toEqual({ jsErrors: 0 });
6666
expect(json.urls).toEqual({
67-
"https://webrecorder.net/": {
67+
"https://old.webrecorder.net/": {
6868
status: 200,
6969
mime: "text/html",
7070
type: "document",
7171
},
72-
"https://webrecorder.net/assets/tools/logo-pywb.png": {
72+
"https://old.webrecorder.net/assets/tools/logo-pywb.png": {
7373
mime: "image/png",
7474
status: 200,
7575
type: "image",
7676
},
77-
"https://webrecorder.net/assets/brand/archivewebpage-icon-color.svg": {
77+
"https://old.webrecorder.net/assets/brand/archivewebpage-icon-color.svg": {
7878
mime: "image/svg+xml",
7979
status: 200,
8080
type: "image",
8181
},
82-
"https://webrecorder.net/assets/brand/browsertrix-icon-color.svg": {
82+
"https://old.webrecorder.net/assets/brand/browsertrix-icon-color.svg": {
8383
mime: "image/svg+xml",
8484
status: 200,
8585
type: "image",
8686
},
87-
"https://webrecorder.net/assets/brand/browsertrixcrawler-icon-color.svg": {
87+
"https://old.webrecorder.net/assets/brand/browsertrixcrawler-icon-color.svg": {
8888
mime: "image/svg+xml",
8989
status: 200,
9090
type: "image",
9191
},
92-
"https://webrecorder.net/assets/brand/replaywebpage-icon-color.svg": {
92+
"https://old.webrecorder.net/assets/brand/replaywebpage-icon-color.svg": {
9393
mime: "image/svg+xml",
9494
status: 200,
9595
type: "image",
9696
},
97-
"https://webrecorder.net/assets/fontawesome/all.css": {
97+
"https://old.webrecorder.net/assets/fontawesome/all.css": {
9898
status: 200,
9999
mime: "text/css",
100100
type: "stylesheet",
101101
},
102-
"https://webrecorder.net/assets/wr-logo.svg": {
102+
"https://old.webrecorder.net/assets/wr-logo.svg": {
103103
status: 200,
104104
mime: "image/svg+xml",
105105
type: "image",
106106
},
107-
"https://webrecorder.net/assets/main.css": {
107+
"https://old.webrecorder.net/assets/main.css": {
108108
status: 200,
109109
mime: "text/css",
110110
type: "stylesheet",
@@ -113,22 +113,15 @@ function validateResourcesIndex(json) {
113113
{ status: 200, mime: "text/css", type: "stylesheet" },
114114
"https://fonts.googleapis.com/css?family=Source+Code+Pro|Source+Sans+Pro&display=swap":
115115
{ status: 200, mime: "text/css", type: "stylesheet" },
116-
"https://stats.browsertrix.com/js/script.tagged-events.js": {
117-
status: 200,
118-
mime: "application/javascript",
119-
type: "script",
120-
},
121116
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xK3dSBYKcSV-LCoeQqfX1RYOo3qOK7l.woff2":
122117
{ status: 200, mime: "font/woff2", type: "font" },
123118
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2":
124119
{ status: 200, mime: "font/woff2", type: "font" },
125-
"https://webrecorder.net/assets/favicon.ico": {
120+
"https://old.webrecorder.net/assets/favicon.ico": {
126121
status: 200,
127122
mime: "image/vnd.microsoft.icon",
128123
type: "other",
129124
},
130-
"https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2F&d=webrecorder.net&r=null":
131-
{ status: 202, mime: "text/plain", type: "xhr" },
132125
});
133126
}
134127

@@ -139,17 +132,17 @@ function validateResourcesAbout(json) {
139132
expect(json).toHaveProperty("urls");
140133
expect(json.counts).toEqual({ jsErrors: 0 });
141134
expect(json.urls).toEqual({
142-
"https://webrecorder.net/about": {
135+
"https://old.webrecorder.net/about": {
143136
status: 200,
144137
mime: "text/html",
145138
type: "document",
146139
},
147-
"https://webrecorder.net/assets/main.css": {
140+
"https://old.webrecorder.net/assets/main.css": {
148141
status: 200,
149142
mime: "text/css",
150143
type: "stylesheet",
151144
},
152-
"https://webrecorder.net/assets/fontawesome/all.css": {
145+
"https://old.webrecorder.net/assets/fontawesome/all.css": {
153146
status: 200,
154147
mime: "text/css",
155148
type: "stylesheet",
@@ -158,12 +151,7 @@ function validateResourcesAbout(json) {
158151
{ status: 200, mime: "text/css", type: "stylesheet" },
159152
"https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@700;900&display=swap":
160153
{ status: 200, mime: "text/css", type: "stylesheet" },
161-
"https://stats.browsertrix.com/js/script.tagged-events.js": {
162-
status: 200,
163-
mime: "application/javascript",
164-
type: "script",
165-
},
166-
"https://webrecorder.net/assets/wr-logo.svg": {
154+
"https://old.webrecorder.net/assets/wr-logo.svg": {
167155
status: 200,
168156
mime: "image/svg+xml",
169157
type: "image",
@@ -172,12 +160,6 @@ function validateResourcesAbout(json) {
172160
{ status: 200, mime: "font/woff2", type: "font" },
173161
"https://fonts.gstatic.com/s/sourcesanspro/v22/6xKydSBYKcSV-LCoeQqfX1RYOo3ig4vwlxdu.woff2":
174162
{ status: 200, mime: "font/woff2", type: "font" },
175-
"https://stats.browsertrix.com/api/event?__wb_method=POST&n=pageview&u=https%3A%2F%2Fwebrecorder.net%2Fabout&d=webrecorder.net&r=null":
176-
{
177-
status: 0,
178-
type: "xhr",
179-
error: "net::ERR_BLOCKED_BY_CLIENT",
180-
},
181163
});
182164
}
183165

tests/proxy.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const WRONG_PORT = "33130";
1010
const SSH_PROXY_IMAGE = "linuxserver/openssh-server"
1111

1212
const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";
13-
const HTML = "https://webrecorder.net/";
13+
const HTML = "https://old.webrecorder.net/";
1414

1515
const extraArgs = "--limit 1 --failOnFailedSeed --timeout 10 --logging debug";
1616

tests/qa_compare.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ test("run initial crawl with text and screenshots to prepare for QA", async () =
88
fs.rmSync("./test-crawls/qa-wr-net", { recursive: true, force: true });
99

1010
child_process.execSync(
11-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --url https://webrecorder.net/about --url https://browsertrix.com/ --url https://webrecorder.net/contact --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ",
11+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --url https://old.webrecorder.net/about --url https://browsertrix.com/ --url https://old.webrecorder.net/contact --scopeType page --collection qa-wr-net --text to-warc --screenshot view --generateWACZ",
1212
);
1313

1414
expect(

tests/rollover-writer.test.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import fs from "fs";
33

44
test("set rollover to 500K and ensure individual WARCs rollover, including screenshots", async () => {
55
child_process.execSync(
6-
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --limit 5 --exclude community --collection rollover-500K --rolloverSize 500000 --screenshot view"
6+
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 5 --exclude community --collection rollover-500K --rolloverSize 500000 --screenshot view"
77
);
88

99
const warcLists = fs.readdirSync("test-crawls/collections/rollover-500K/archive");

0 commit comments

Comments
 (0)