Skip to content

Commit

Permalink
feat: Backup proof failures to google cloud storage (#11255)
Browse files Browse the repository at this point in the history
Backups failed proof inputs to a configurable google cloud storage
bucket. Uploads are done from the `BrokerCircuitProverFacade` using an
optional `ProofStore` for failed proofs.

Adds a new `GoogleCloudProofStore` that stores proof inputs (outputs not
implemented) in a google cloud storage bucket. Uses Application Default
Credentials for connecting, which works locally after configuring gcloud
credentials locally. Pending configuring the k8s cluster so pods there
have access to gcs. The store is enabled via a config var
`PROVER_FAILED_PROOF_STORE=gs://aztec-develop/dev/palla/`, which defines
bucket and path.

Failed jobs are logged with their uri:
```
[19:32:12.167] INFO: prover-client:broker-circuit-prover-facade Stored proof inputs for failed job id=3:PUBLIC_VM:7967c3a6f2c5728a94be3643db4ac088a057392f24f897cbbff4b0ea4fc8bc83 type=PUBLIC_VM at gs://aztec-develop/dev/palla/proofs/inputs/PUBLIC_VM/3:PUBLIC_VM:7967c3a6f2c5728a94be3643db4ac088a057392f24f897cbbff4b0ea4fc8bc83 {"id":"3:PUBLIC_VM:7967c3a6f2c5728a94be3643db4ac088a057392f24f897cbbff4b0ea4fc8bc83","type":0,"uri":"gs://aztec-develop/dev/palla/proofs/inputs/PUBLIC_VM/3:PUBLIC_VM:7967c3a6f2c5728a94be3643db4ac088a057392f24f897cbbff4b0ea4fc8bc83"}
```

This PR also adds `get-proof-inputs` a command to download the proof
inputs via uri and write them locally using the same filenames that bb
expects (only implemented for AVM for now):

```
$ yarn get-proof-inputs  gs://aztec-develop/dev/palla/proofs/inputs/PUBLIC_VM/3:PUBLIC_VM:4e30aa697f479043cabfab9fab561b7195e2996cbb276cefdd8c6f1f29b60e69
[20:07:15.153] INFO: prover-client:proof-store Creating google cloud proof store at aztec-develop {"bucket":"aztec-develop","path":"dev/palla/proofs/inputs/PUBLIC_VM/3:PUBLIC_VM:4e30aa697f479043cabfab9fab561b7195e2996cbb276cefdd8c6f1f29b60e69"}
[20:07:15.153] INFO: prover-client:get-proof-inputs Processing uri gs://aztec-develop/dev/palla/proofs/inputs/PUBLIC_VM/3:PUBLIC_VM:4e30aa697f479043cabfab9fab561b7195e2996cbb276cefdd8c6f1f29b60e69
[20:07:17.512] INFO: prover-client:get-proof-inputs Found inputs for PUBLIC_VM
[20:07:17.514] INFO: prover-client:get-proof-inputs Wrote AVM public inputs to avm_public_inputs.bin
[20:07:17.516] INFO: prover-client:get-proof-inputs Wrote AVM hints to avm_hints.bin
```

Fixes #11062
  • Loading branch information
spalladino authored Jan 16, 2025
1 parent fbcc8ef commit b4775fd
Show file tree
Hide file tree
Showing 24 changed files with 588 additions and 85 deletions.
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ spec:
value: "{{ .Values.proverNode.txGathering.intervalMs }}"
- name: PROVER_NODE_TX_GATHERING_MAX_PARALLEL_REQUESTS
value: "{{ .Values.proverNode.txGathering.maxParallelRequests }}"
- name: PROVER_FAILED_PROOF_STORE
value: "{{ .Values.proverNode.failedProofStore }}"
- name: OTEL_RESOURCE_ATTRIBUTES
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: L1_CHAIN_ID
Expand Down
1 change: 1 addition & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ proverNode:
timeoutMs: 60000
intervalMs: 1000
maxParallelRequests: 100
failedProofStore: ""

pxe:
logLevel: "debug; info: aztec:simulator, json-rpc"
Expand Down
8 changes: 8 additions & 0 deletions yarn-project/circuit-types/src/interfaces/prover-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ export type ProverConfig = ActualProverConfig & {
nodeUrl?: string;
/** Identifier of the prover */
proverId: Fr;
/** Number of proving agents to start within the prover. */
proverAgentCount: number;
/** Store for failed proof inputs. */
failedProofStore?: string;
};

export const ProverConfigSchema = z.object({
Expand Down Expand Up @@ -60,6 +63,11 @@ export const proverConfigMappings: ConfigMappingsType<ProverConfig> = {
description: 'The number of prover agents to start',
...numberConfigHelper(1),
},
failedProofStore: {
env: 'PROVER_FAILED_PROOF_STORE',
description:
'Store for failed proof inputs. Google cloud storage is only supported at the moment. Set this value as gs://bucket-name/path/to/store.',
},
};

function parseProverId(str: string) {
Expand Down
44 changes: 40 additions & 4 deletions yarn-project/circuit-types/src/interfaces/proving-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import { type ZodFor } from '@aztec/foundation/schemas';

import { z } from 'zod';

import { type CircuitName } from '../stats/index.js';
import { type ServerCircuitName } from '../stats/index.js';

export type ProofAndVerificationKey<N extends number> = {
proof: RecursiveProof<N>;
Expand Down Expand Up @@ -95,7 +95,7 @@ export enum ProvingRequestType {
TUBE_PROOF,
}

export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): CircuitName {
export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): ServerCircuitName {
switch (type) {
case ProvingRequestType.PUBLIC_VM:
return 'avm-circuit';
Expand All @@ -121,11 +121,12 @@ export function mapProvingRequestTypeToCircuitName(type: ProvingRequestType): Ci
return 'root-parity';
case ProvingRequestType.TUBE_PROOF:
return 'tube-circuit';
default:
default: {
const _exhaustive: never = type;
throw new Error(`Cannot find circuit name for proving request type: ${type}`);
}
}
}

export type AvmProvingRequest = z.infer<typeof AvmProvingRequestSchema>;

export const AvmProvingRequestSchema = z.object({
Expand All @@ -150,7 +151,42 @@ export const ProvingJobInputs = z.discriminatedUnion('type', [
z.object({ type: z.literal(ProvingRequestType.ROOT_ROLLUP), inputs: RootRollupInputs.schema }),
z.object({ type: z.literal(ProvingRequestType.TUBE_PROOF), inputs: TubeInputs.schema }),
]);

export function getProvingJobInputClassFor(type: ProvingRequestType) {
switch (type) {
case ProvingRequestType.PUBLIC_VM:
return AvmCircuitInputs;
case ProvingRequestType.PRIVATE_BASE_ROLLUP:
return PrivateBaseRollupInputs;
case ProvingRequestType.PUBLIC_BASE_ROLLUP:
return PublicBaseRollupInputs;
case ProvingRequestType.MERGE_ROLLUP:
return MergeRollupInputs;
case ProvingRequestType.EMPTY_BLOCK_ROOT_ROLLUP:
return EmptyBlockRootRollupInputs;
case ProvingRequestType.BLOCK_ROOT_ROLLUP:
return BlockRootRollupInputs;
case ProvingRequestType.SINGLE_TX_BLOCK_ROOT_ROLLUP:
return SingleTxBlockRootRollupInputs;
case ProvingRequestType.BLOCK_MERGE_ROLLUP:
return BlockMergeRollupInputs;
case ProvingRequestType.ROOT_ROLLUP:
return RootRollupInputs;
case ProvingRequestType.BASE_PARITY:
return BaseParityInputs;
case ProvingRequestType.ROOT_PARITY:
return RootParityInputs;
case ProvingRequestType.TUBE_PROOF:
return TubeInputs;
default: {
const _exhaustive: never = type;
throw new Error(`Cannot find circuit inputs class for proving type ${type}`);
}
}
}

export type ProvingJobInputs = z.infer<typeof ProvingJobInputs>;

export type ProvingJobInputsMap = {
[ProvingRequestType.PUBLIC_VM]: AvmCircuitInputs;
[ProvingRequestType.PRIVATE_BASE_ROLLUP]: PrivateBaseRollupInputs;
Expand Down
18 changes: 11 additions & 7 deletions yarn-project/circuit-types/src/stats/stats.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,15 @@ export type NodeSyncedChainHistoryStats = {
dbSize: number;
};

export type CircuitName =
export type ClientCircuitName =
| 'private-kernel-init'
| 'private-kernel-inner'
| 'private-kernel-reset'
| 'private-kernel-tail'
| 'private-kernel-tail-to-public'
| 'app-circuit';

export type ServerCircuitName =
| 'base-parity'
| 'root-parity'
| 'private-base-rollup'
Expand All @@ -84,15 +92,11 @@ export type CircuitName =
| 'empty-block-root-rollup'
| 'block-merge-rollup'
| 'root-rollup'
| 'private-kernel-init'
| 'private-kernel-inner'
| 'private-kernel-reset'
| 'private-kernel-tail'
| 'private-kernel-tail-to-public'
| 'app-circuit'
| 'avm-circuit'
| 'tube-circuit';

export type CircuitName = ClientCircuitName | ServerCircuitName;

/** Stats for circuit simulation. */
export type CircuitSimulationStats = {
/** name of the event. */
Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ export type EnvVar =
| 'PROVER_BROKER_JOB_MAX_RETRIES'
| 'PROVER_COORDINATION_NODE_URL'
| 'PROVER_DISABLED'
| 'PROVER_FAILED_PROOF_STORE'
| 'PROVER_ID'
| 'PROVER_JOB_POLL_INTERVAL_MS'
| 'PROVER_JOB_TIMEOUT_MS'
Expand Down
4 changes: 3 additions & 1 deletion yarn-project/prover-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
"formatting:fix": "run -T eslint --fix ./src && run -T prettier -w ./src",
"bb": "node --no-warnings ./dest/bb/index.js",
"test": "NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=3500000 --forceExit",
"test:debug": "LOG_LEVEL=debug NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=1500000 --forceExit --testNamePattern prover/bb_prover/parity"
"test:debug": "LOG_LEVEL=debug NODE_NO_WARNINGS=1 node --experimental-vm-modules ../node_modules/.bin/jest --testTimeout=1500000 --forceExit --testNamePattern prover/bb_prover/parity",
"get-proof-inputs": "node --no-warnings ./dest/bin/get-proof-inputs.js"
},
"jest": {
"moduleNameMapper": {
Expand Down Expand Up @@ -76,6 +77,7 @@
"@aztec/simulator": "workspace:^",
"@aztec/telemetry-client": "workspace:^",
"@aztec/world-state": "workspace:^",
"@google-cloud/storage": "^7.15.0",
"@noir-lang/types": "portal:../../noir/packages/types",
"commander": "^12.1.0",
"lodash.chunk": "^4.2.0",
Expand Down
60 changes: 60 additions & 0 deletions yarn-project/prover-client/src/bin/get-proof-inputs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* eslint-disable no-console */
import { AVM_HINTS_FILENAME, AVM_PUBLIC_INPUTS_FILENAME } from '@aztec/bb-prover';
import { type ProofUri, ProvingJobInputs, ProvingRequestType } from '@aztec/circuit-types';
import { jsonParseWithSchema, jsonStringify } from '@aztec/foundation/json-rpc';
import { createLogger } from '@aztec/foundation/log';

import { mkdirSync, writeFileSync } from 'fs';

import { createProofStoreForUri } from '../proving_broker/index.js';

const logger = createLogger('prover-client:get-proof-inputs');

function printUsage() {
console.error('Usage: get-proof-inputs <proof-uri> [out-dir=.]');
}

async function main() {
if (process.argv[2] === '--help') {
printUsage();
return;
}

const uri = process.argv[2];
const outDir = process.argv[3] || '.';
if (!uri) {
printUsage();
throw new Error('Missing proof URI');
}

mkdirSync(outDir, { recursive: true });

const proofStore = createProofStoreForUri(uri);
logger.info(`Processing uri ${uri}`);
const input = await proofStore.getProofInput(uri as ProofUri);
logger.info(`Found inputs for ${ProvingRequestType[input.type]}`);
writeProofInputs(input, outDir);

console.log(jsonParseWithSchema(jsonStringify(input), ProvingJobInputs).inputs);
}

// This mimics the behavior of bb-prover/src/bb/execute.ts
function writeProofInputs(input: ProvingJobInputs, outDir: string) {
switch (input.type) {
case ProvingRequestType.PUBLIC_VM: {
writeFileSync(`${outDir}/${AVM_PUBLIC_INPUTS_FILENAME}`, input.inputs.output.toBuffer());
logger.info(`Wrote AVM public inputs to ${AVM_PUBLIC_INPUTS_FILENAME}`);
writeFileSync(`${outDir}/${AVM_HINTS_FILENAME}`, input.inputs.avmHints.toBuffer());
logger.info(`Wrote AVM hints to ${AVM_HINTS_FILENAME}`);
break;
}
default: {
throw new Error(`Unimplemented proving request type: ${ProvingRequestType[input.type]}`);
}
}
}

main().catch(err => {
console.error(err);
process.exit(1);
});
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { AbortError } from '@aztec/foundation/error';
import { sleep } from '@aztec/foundation/sleep';
import { getTelemetryClient } from '@aztec/telemetry-client';

import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store/index.js';
import { MemoryProvingQueue } from './memory-proving-queue.js';

describe('MemoryProvingQueue', () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import { type PromiseWithResolvers, RunningPromise, promiseWithResolvers } from
import { PriorityMemoryQueue } from '@aztec/foundation/queue';
import { type TelemetryClient, type Tracer, trackSpan } from '@aztec/telemetry-client';

import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore } from '../proving_broker/proof_store/index.js';
import { ProvingQueueMetrics } from './queue_metrics.js';

type ProvingJobWithResolvers<T extends ProvingRequestType = ProvingRequestType> = ProvingJob &
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import {
trackSpan,
} from '@aztec/telemetry-client';

import { InlineProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore } from '../proving_broker/proof_store/index.js';

const PRINT_THRESHOLD_NS = 6e10; // 60 seconds

Expand Down
12 changes: 9 additions & 3 deletions yarn-project/prover-client/src/prover-client/prover-client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { type TelemetryClient, getTelemetryClient } from '@aztec/telemetry-clien
import { type ProverClientConfig } from '../config.js';
import { ProvingOrchestrator } from '../orchestrator/orchestrator.js';
import { BrokerCircuitProverFacade } from '../proving_broker/broker_prover_facade.js';
import { InlineProofStore } from '../proving_broker/proof_store.js';
import { InlineProofStore, type ProofStore, createProofStore } from '../proving_broker/proof_store/index.js';
import { ProvingAgent } from '../proving_broker/proving_agent.js';
import { ServerEpochProver } from './server-epoch-prover.js';

Expand All @@ -27,17 +27,23 @@ export class ProverClient implements EpochProverManager {
private running = false;
private agents: ProvingAgent[] = [];

private proofStore: ProofStore;
private failedProofStore: ProofStore | undefined;

private constructor(
private config: ProverClientConfig,
private worldState: ForkMerkleTreeOperations,
private orchestratorClient: ProvingJobProducer,
private agentClient?: ProvingJobConsumer,
private telemetry: TelemetryClient = getTelemetryClient(),
private log = createLogger('prover-client:tx-prover'),
) {}
) {
this.proofStore = new InlineProofStore();
this.failedProofStore = this.config.failedProofStore ? createProofStore(this.config.failedProofStore) : undefined;
}

public createEpochProver(): EpochProver {
const facade = new BrokerCircuitProverFacade(this.orchestratorClient);
const facade = new BrokerCircuitProverFacade(this.orchestratorClient, this.proofStore, this.failedProofStore);
const orchestrator = new ProvingOrchestrator(this.worldState, facade, this.config.proverId, this.telemetry);
return new ServerEpochProver(facade, orchestrator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,23 @@ import { jest } from '@jest/globals';

import { MockProver, TestBroker } from '../test/mock_prover.js';
import { BrokerCircuitProverFacade } from './broker_prover_facade.js';
import { InlineProofStore } from './proof_store.js';
import { InlineProofStore } from './proof_store/index.js';

describe('BrokerCircuitProverFacade', () => {
let facade: BrokerCircuitProverFacade;
let proofStore: InlineProofStore;
let errorProofStore: InlineProofStore;
let broker: TestBroker;
let prover: MockProver;
let agentPollInterval: number;

beforeEach(async () => {
proofStore = new InlineProofStore();
errorProofStore = new InlineProofStore();
prover = new MockProver();
agentPollInterval = 100;
broker = new TestBroker(2, prover, proofStore, agentPollInterval);
facade = new BrokerCircuitProverFacade(broker, proofStore);
facade = new BrokerCircuitProverFacade(broker, proofStore, errorProofStore);

await broker.start();
facade.start();
Expand All @@ -31,6 +33,7 @@ describe('BrokerCircuitProverFacade', () => {
afterEach(async () => {
await broker.stop();
await facade.stop();
jest.restoreAllMocks();
});

it('sends jobs to the broker', async () => {
Expand All @@ -39,11 +42,13 @@ describe('BrokerCircuitProverFacade', () => {

jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof');
jest.spyOn(errorProofStore, 'saveProofInput');

await expect(facade.getBaseParityProof(inputs, controller.signal, 42)).resolves.toBeDefined();

expect(broker.enqueueProvingJob).toHaveBeenCalled();
expect(prover.getBaseParityProof).toHaveBeenCalledWith(inputs, expect.anything(), 42);
expect(errorProofStore.saveProofInput).not.toHaveBeenCalled();
});

it('handles multiple calls for the same job', async () => {
Expand Down Expand Up @@ -103,6 +108,7 @@ describe('BrokerCircuitProverFacade', () => {
const resultPromise = promiseWithResolvers<any>();
jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof').mockReturnValue(resultPromise.promise);
jest.spyOn(errorProofStore, 'saveProofInput');

// send N identical proof requests
const CALLS = 50;
Expand Down Expand Up @@ -136,6 +142,8 @@ describe('BrokerCircuitProverFacade', () => {
expect(broker.enqueueProvingJob).toHaveBeenCalledTimes(2);
// but no new jobs where created
expect(prover.getBaseParityProof).toHaveBeenCalledTimes(1);
// and the proof input will have been backed up
expect(errorProofStore.saveProofInput).toHaveBeenCalled();
});

it('handles aborts', async () => {
Expand All @@ -145,6 +153,7 @@ describe('BrokerCircuitProverFacade', () => {
const resultPromise = promiseWithResolvers<any>();
jest.spyOn(broker, 'enqueueProvingJob');
jest.spyOn(prover, 'getBaseParityProof').mockReturnValue(resultPromise.promise);
jest.spyOn(errorProofStore, 'saveProofInput');

const promise = facade.getBaseParityProof(inputs, controller.signal, 42).catch(err => ({ err }));

Expand All @@ -154,6 +163,7 @@ describe('BrokerCircuitProverFacade', () => {
controller.abort();

await expect(promise).resolves.toEqual({ err: new Error('Aborted') });
expect(errorProofStore.saveProofInput).not.toHaveBeenCalled();
});

it('rejects jobs when the facade is stopped', async () => {
Expand Down
Loading

0 comments on commit b4775fd

Please sign in to comment.