Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(assets): Use entity-tags to revalidate cached remote images #12426

Merged
merged 12 commits into from
Dec 18, 2024
Merged
7 changes: 7 additions & 0 deletions .changeset/red-poems-pay.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'astro': minor
---

Improves asset caching of remote images

Astro will now store [entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) and the [Last-Modified](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified) date for cached remote images and use them to revalidate the cache when it goes stale.
101 changes: 76 additions & 25 deletions packages/astro/src/assets/build/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ import { getConfiguredImageService } from '../internal.js';
import type { LocalImageService } from '../services/service.js';
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js';
import { isESMImportedImage } from '../utils/imageKind.js';
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js';
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js';

interface GenerationDataUncached {
cached: false;
cached: 'miss';
weight: {
before: number;
after: number;
};
}

interface GenerationDataCached {
cached: true;
cached: 'revalidated' | 'hit';
}

type GenerationData = GenerationDataUncached | GenerationDataCached;
Expand All @@ -43,7 +43,12 @@ type AssetEnv = {
assetsFolder: AstroConfig['build']['assets'];
};

type ImageData = { data: Uint8Array; expires: number };
type ImageData = {
data: Uint8Array;
expires: number;
etag?: string;
lastModified?: string;
};

export async function prepareAssetsGenerationEnv(
pipeline: BuildPipeline,
Expand Down Expand Up @@ -135,9 +140,12 @@ export async function generateImagesForPath(
const timeEnd = performance.now();
const timeChange = getTimeStat(timeStart, timeEnd);
const timeIncrease = `(+${timeChange})`;
const statsText = generationData.cached
? `(reused cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const statsText =
generationData.cached !== 'miss'
? generationData.cached === 'hit'
? `(reused cache entry)`
: `(revalidated cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const count = `(${env.count.current}/${env.count.total})`;
env.logger.info(
null,
Expand All @@ -156,7 +164,7 @@ export async function generateImagesForPath(
const finalFolderURL = new URL('./', finalFileURL);
await fs.promises.mkdir(finalFolderURL, { recursive: true });

// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date, etag and last-modified date from the server
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json');
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir);

Expand All @@ -166,7 +174,7 @@ export async function generateImagesForPath(
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE);

return {
cached: true,
cached: 'hit',
};
} else {
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry;
Expand All @@ -184,11 +192,43 @@ export async function generateImagesForPath(
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));

return {
cached: true,
cached: 'hit',
};
} else {
await fs.promises.unlink(cachedFileURL);
}

// Try to revalidate the cache
if (JSONData.etag || JSONData.lastModified) {
try {
const revalidatedData = await revalidateRemoteImage(options.src as string, {
etag: JSONData.etag,
lastModified: JSONData.lastModified,
});

if (revalidatedData.data.length) {
// Image cache was stale, update original image to avoid redownload
originalImage = revalidatedData;
} else {
revalidatedData.data = Buffer.from(JSONData.data, 'base64');

// Freshen cache on disk
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env);

await fs.promises.writeFile(finalFileURL, revalidatedData.data);
return { cached: 'revalidated' };
}
} catch (e) {
// Reuse stale cache if revalidation fails
env.logger.warn(
null,
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`,
);

await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
return { cached: 'hit' };
}
}

await fs.promises.unlink(cachedFileURL);
}
} catch (e: any) {
if (e.code !== 'ENOENT') {
Expand All @@ -209,6 +249,8 @@ export async function generateImagesForPath(
let resultData: Partial<ImageData> = {
data: undefined,
expires: originalImage.expires,
etag: originalImage.etag,
lastModified: originalImage.lastModified,
};

const imageService = (await getConfiguredImageService()) as LocalImageService;
Expand Down Expand Up @@ -239,13 +281,7 @@ export async function generateImagesForPath(
if (isLocalImage) {
await fs.promises.writeFile(cachedFileURL, resultData.data);
} else {
await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
}),
);
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env);
}
}
} catch (e) {
Expand All @@ -259,7 +295,7 @@ export async function generateImagesForPath(
}

return {
cached: false,
cached: 'miss',
weight: {
// Divide by 1024 to get size in kilobytes
before: Math.trunc(originalImage.data.byteLength / 1024),
Expand All @@ -269,6 +305,25 @@ export async function generateImagesForPath(
}
}

async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) {
try {
return await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
Copy link
Contributor

@ascorbic ascorbic Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realise that this is what we were already doing, but it does seem a bit wasteful to be saving images as JSON-encoded base64 strings, and this could be a good time to fix it. I think it would be better to store the image in a binary, and then use a separate JSON file for metadata, probably with the same filename alongside it but with something like an appended .json

expires: resultData.expires,
etag: resultData.etag,
lastModified: resultData.lastModified,
}),
);
} catch (e) {
env.logger.warn(
null,
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`,
);
}
}

export function getStaticImageList(): AssetsGlobalStaticImagesList {
if (!globalThis?.astroAsset?.staticImages) {
return new Map();
Expand All @@ -279,11 +334,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList {

async function loadImage(path: string, env: AssetEnv): Promise<ImageData> {
if (isRemotePath(path)) {
const remoteImage = await loadRemoteImage(path);
return {
data: remoteImage.data,
expires: remoteImage.expires,
};
Comment on lines -282 to -286
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what happened here, but thank you for cleaning it, ha

return await loadRemoteImage(path);
}

return {
Expand Down
62 changes: 61 additions & 1 deletion packages/astro/src/assets/build/remote.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import CachePolicy from 'http-cache-semantics';

export type RemoteCacheEntry = { data: string; expires: number };
export type RemoteCacheEntry = {
data: string;
expires: number;
etag?: string;
lastModified?: string;
};

export async function loadRemoteImage(src: string) {
const req = new Request(src);
Expand All @@ -19,6 +24,61 @@ export async function loadRemoteImage(src: string) {
return {
data: Buffer.from(await res.arrayBuffer()),
expires: Date.now() + expires,
etag: res.headers.get('Etag') ?? undefined,
lastModified: res.headers.get('Last-Modified') ?? undefined,
};
}

/**
* Revalidate a cached remote asset using its entity-tag or modified date.
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) and [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since)
* headers to check with the remote server if the cached version of a remote asset is still up to date.
* The remote server may respond that the cached asset is still up-to-date if the entity-tag or modification time matches (304 Not Modified), or respond with an updated asset (200 OK)
* @param src - url to remote asset
* @param revalidationData - an object containing the stored Entity-Tag of the cached asset and/or the Last Modified time
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified.
*/
export async function revalidateRemoteImage(
src: string,
revalidationData: { etag?: string; lastModified?: string },
) {
const headers = {
...(revalidationData.etag && { 'If-None-Match': revalidationData.etag }),
...(revalidationData.lastModified && { 'If-Modified-Since': revalidationData.lastModified }),
};
const req = new Request(src, { headers });
const res = await fetch(req);

// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304
if (!res.ok && res.status !== 304) {
throw new Error(
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`,
);
}

const data = Buffer.from(await res.arrayBuffer());

if (res.ok && !data.length) {
// Server did not include body but indicated cache was stale
return await loadRemoteImage(src);
}

// calculate an expiration date based on the response's TTL
const policy = new CachePolicy(
webToCachePolicyRequest(req),
webToCachePolicyResponse(
res.ok ? res : new Response(null, { status: 200, headers: res.headers }),
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL
);
const expires = policy.storable() ? policy.timeToLive() : 0;

return {
data,
expires: Date.now() + expires,
// While servers should respond with the same headers as a 200 response, if they don't we should reuse the stored value
etag: res.headers.get('Etag') ?? (res.ok ? undefined : revalidationData.etag),
lastModified:
res.headers.get('Last-Modified') ?? (res.ok ? undefined : revalidationData.lastModified),
};
}

Expand Down
Loading