Skip to content

Commit

Permalink
feat: cache index data (#62)
Browse files Browse the repository at this point in the history
Caches satnav data in cloudflare's cache.
  • Loading branch information
Alan Shaw authored Jul 13, 2023
1 parent fb85a04 commit 759d117
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 18 deletions.
14 changes: 7 additions & 7 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"streaming-iterables": "^7.1.0"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20230518.0",
"@cloudflare/workers-types": "^4.20230628.0",
"ava": "^5.2.0",
"carbites": "^1.0.6",
"esbuild": "^0.17.11",
Expand Down
15 changes: 14 additions & 1 deletion src/bindings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,22 @@ export interface Environment {
MAX_SHARDS: string
}

/**
* Simple bucket does not allow range requests or support metadata on returned
* objects.
*/
export interface SimpleBucket {
get (key: string): Promise<SimpleBucketObject | null>
}

export interface SimpleBucketObject {
readonly key: string
readonly body: ReadableStream
}

export interface IndexSource {
/** Bucket this index can be found in */
bucket: R2Bucket
bucket: SimpleBucket
/** Bucket key for the source */
key: string
/**
Expand Down
2 changes: 2 additions & 0 deletions src/constants.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
export const MAX_CAR_BYTES_IN_MEMORY = 1024 * 1024 * 5
export const CAR_CODE = 0x0202
48 changes: 48 additions & 0 deletions src/lib/bucket.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/* eslint-env worker */

const MAX_AGE = 86400 // 1 day

/**
* @typedef {import('../bindings').SimpleBucket} SimpleBucket
* @implements {SimpleBucket}
*/
export class CachingBucket {
#source
#cache
#ctx

/**
* @param {SimpleBucket} source
* @param {Cache} cache
* @param {Pick<import('@cloudflare/workers-types').ExecutionContext, 'waitUntil'>} ctx
*/
constructor (source, cache, ctx) {
this.#source = source
this.#cache = cache
this.#ctx = ctx
}

/** @type {import('../bindings').SimpleBucket['get']} */
async get (key) {
const cacheKey = new URL(key, 'http://localhost')
const res = await this.#cache.match(cacheKey)
if (res && res.body) return { key, body: res.body }
const obj = await this.#source.get(key)
if (!obj) return null
const [body0, body1] = obj.body.tee()
this.#ctx.waitUntil(this.#cache.put(cacheKey, new Response(body1, {
headers: { 'Cache-Control': `max-age=${MAX_AGE}` }
})))
return { key, body: body0 }
}
}

/**
* Cast an R2 bucket as a simple bucket.
*
* @param {import('@cloudflare/workers-types').R2Bucket} r2
* @returns {SimpleBucket}
*/
// @ts-expect-error R2Bucket.get is overloaded with a non-optional options which
// means it does not overlap with our SimpleBucket interface :(
export const asSimpleBucket = r2 => r2
24 changes: 18 additions & 6 deletions src/middleware.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@ import { parseCid, HttpError, toIterable } from '@web3-storage/gateway-lib/util'
import { BatchingR2Blockstore } from './lib/blockstore.js'
import { version } from '../package.json'
import { BlocklyIndex, MultiCarIndex, StreamingCarIndex } from './lib/car-index.js'

const MAX_CAR_BYTES_IN_MEMORY = 1024 * 1024 * 5
const CAR_CODE = 0x0202
import { CachingBucket, asSimpleBucket } from './lib/bucket.js'
import { MAX_CAR_BYTES_IN_MEMORY, CAR_CODE } from './constants.js'

/**
* @typedef {import('./bindings').Environment} Environment
Expand Down Expand Up @@ -46,6 +45,8 @@ export function withIndexSources (handler) {
// context. If we have a given root CID split across hundreds of CARs,
// freeway will hit the sub-requests limit and not serve the content.
const maxShards = env.MAX_SHARDS ? parseInt(env.MAX_SHARDS) : 825
// open a cache explicitly for storing index data
const cache = await caches.open('index-source')

/** @type {import('./bindings').IndexSource[]} */
let indexSources = ctx.searchParams
Expand All @@ -61,7 +62,11 @@ export function withIndexSources (handler) {
return cids
}, /** @type {import('cardex/api').CARLink[]} */([]))
})
.map(cid => ({ origin: cid, bucket: env.SATNAV, key: `${cid}/${cid}.car.idx` }))
.map(cid => ({
origin: cid,
bucket: new CachingBucket(asSimpleBucket(env.SATNAV), cache, ctx),
key: `${cid}/${cid}.car.idx`
}))

// if origins were not specified or invalid
if (!indexSources.length) {
Expand All @@ -73,14 +78,21 @@ export function withIndexSources (handler) {

// if the first encountered item is a index rollup, use it
if (!cursor && results.objects[0].key.endsWith('rollup.idx')) {
indexSources.push({ bucket: env.DUDEWHERE, key: results.objects[0].key })
indexSources.push({
bucket: new CachingBucket(asSimpleBucket(env.DUDEWHERE), cache, ctx),
key: results.objects[0].key
})
break
}

indexSources.push(...results.objects.map(o => {
const cid = /** @type {import('cardex/api').CARLink} */(parseCid(o.key.split('/')[1]))
const key = `${cid}/${cid}.car.idx`
return { origin: cid, bucket: env.SATNAV, key }
return {
origin: cid,
bucket: new CachingBucket(asSimpleBucket(env.SATNAV), cache, ctx),
key
}
}))

if (indexSources.length > maxShards) {
Expand Down
45 changes: 42 additions & 3 deletions test/index.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { Miniflare } from 'miniflare'
import { equals } from 'uint8arrays'
import { CarReader } from '@ipld/car'
import { Builder } from './helpers.js'
import { MAX_CAR_BYTES_IN_MEMORY } from '../src/constants.js'

describe('freeway', () => {
/** @type {Miniflare} */
Expand All @@ -28,7 +29,8 @@ describe('freeway', () => {
modules: true,
r2Buckets: bucketNames,
// r2Persist: true
kvNamespaces: ['BLOCKLY']
kvNamespaces: ['BLOCKLY'],
usageModel: 'unbound'
})

const buckets = await Promise.all(bucketNames.map(b => miniflare.getR2Bucket(b)))
Expand Down Expand Up @@ -105,7 +107,7 @@ describe('freeway', () => {
const input = [{ path: 'sargo.tar.xz', content: randomBytes(609261780) }]
const { dataCid, carCids } = await builder.add(input)

// remove the the CAR CIDs from DUDEWHERE so that only the rollup index can
// remove the CAR CIDs from DUDEWHERE so that only the rollup index can
// be used to satisfy the request.
const bucket = await miniflare.getR2Bucket('DUDEWHERE')
for (const cid of carCids) {
Expand Down Expand Up @@ -133,7 +135,7 @@ describe('freeway', () => {
// generate blockly blocks
await builder.blocks(dataCid)

// remove the the CAR CIDs from DUDEWHERE so that only blockly can
// remove the CAR CIDs from DUDEWHERE so that only blockly can
// be used to satisfy the request.
const bucket = await miniflare.getR2Bucket('DUDEWHERE')
for (const cid of carCids) {
Expand All @@ -146,4 +148,41 @@ describe('freeway', () => {
const output = new Uint8Array(await res1.arrayBuffer())
assert(equals(input[0].content, output))
})

it('should cache index files', { only: true }, async () => {
const input = [{ path: 'sargo.tar.xz', content: randomBytes(MAX_CAR_BYTES_IN_MEMORY + 1) }]
const { dataCid, carCids } = await builder.add(input)

const url = `http://localhost:8787/ipfs/${dataCid}/${input[0].path}`
const res0 = await miniflare.dispatchFetch(url)
assert.equal(res0.status, 200)

// wait for response to be put in cache
await res0.waitUntil()

const caches = await miniflare.getCaches()
const indexCache = await caches.open('index-source')

// remove the indexes from SATNAV
const bucket = await miniflare.getR2Bucket('SATNAV')
for (const cid of carCids) {
const key = `${cid}/${cid}.car.idx`
assert.ok(await indexCache.match(`http://localhost/${key}`))
assert.ok(await bucket.head(key))
await bucket.delete(key) // would be great if this returned a boolean 🙄
assert.ok(!(await bucket.head(key)))
}

// delete response from cache, so a second request has to construct the
// response again by reading from cached index
const delRes = await caches.default.delete(url)
assert.ok(delRes)

// should still be able serve this CID now - SATNAV index was found in cache
const res1 = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${dataCid}/${input[0].path}`)
assert.equal(res1.status, 200)

const output = new Uint8Array(await res1.arrayBuffer())
assert(equals(input[0].content, output))
})
})

0 comments on commit 759d117

Please sign in to comment.