add central configuration for Apollo Client cache sizes #11408

phryneas · 2023-12-04T12:10:59Z

This PR adds a central configuration object for cache size configuration to Apollo Client.

Most of this is explained in very long comments in the code, but I also made this diagram (that will need a lot of zooming, sorry!) so I'll add at the end of this comment:

Open Questions:

What do we do about resultCacheMaxSize (added in add resultCacheMaxSize #8107 for historical context)? Currently it governs executeSelectionSet, executeSubSelectedArray and maybeBroadcastWatch which are some of the biggest caches, and has no maximum value.
( I decided to make resultCacheMaxSize as @deprecated and add three more options for these caches, but always give resultCacheMaxSize precedence if it is specified)
Does someone have an idea for a variant of this that is a bit more "lazy-loading" and possible lower bundle size?

Real-world values

Here are some example sizes that I got by plugging this into the Spotify showcase and clicking around until I visited pretty much every screen a few times:

{
  "limits": {
    "parser": 1000,
    "canonicalStringify": 1000,
    "print": 2000,
    "documentTransform": 2000,
    "queryManagerTransforms": 2000,
    "persistedQueryHashes": 2000,
    "fragmentRegistryTransform": 2000,
    "fragmentRegistryLookup": 1000,
    "fragmentRegistryFindFragmentSpreads": 4000
  },
  "sizes": {
    "global": {
      "print": 14,
      "parser": 26,
      "canonicalStringify": 4
    },
    "links": [],
    "queryManager": {
      "Transforms": 14,
      "documentTransforms": []
    },
    "cache": {
      "addTypenameTransform": [
        14
      ],
      "storeReader": {
        "executeSelectionSet": 4345,
        "executeSubSelectedArray": 1206
      },
      "maybeBroadcastWatch": 32,
      "fragmentRegistry": {
        "findFragmentSpreads": 34,
        "lookup": 20,
        "transform": 14
      }
    }
  }
}

Caches and their relationships

erDiagram


parser {
  WeakCache cache
  key DocumentNode
  value name "string"
  value type "DocumentType"
  value variables "readonly VariableDefinitionNode[]"
  max TBD
}

parser o|--o{ useMutation : "verifyDocumentType"
parser o|--o{ useQuery : "verifyDocumentType"
parser o|--o{ useSubscription : "verifyDocumentType"
parser o|--o{ useSuspenseQuery : "verifyDocumentType"
parser o|--o{ HOCs: ""

SuspenseCache {
  Trie queryRefs
  keys array "DocumentNode string ...any[]"
  value QueryRef
  max impossible "not necessary"
}

SuspenseCache o|--o{ ObservableQuery : "query,variables"
SuspenseCache o|--o{ useSuspenseQuery : ""


PersistedQueryLink {
  WeakCache hashesByQuery
  key DocumentNode "serverQuery"
  value Promise "string"
  max TBD
}


ObservableQuery o|--o{ useQuery : "query,variables"
ObservableQuery o|--o{ useMutation : "query,variables"
ObservableQuery o|--o{ SuspenseCache : "query,variables"

ObservableQuery {

}

print {
  WeakCache printCache
  key DocumentNode
  value string
  max TBD
}

QueryManager {
  Trie inFlightLinkObservables
  keys array "print(serverQuery) canonicalStringify(variables)"
  value Observable
  max unnneccessary
  WeakMap transformCache
  key DocumentNode "(which?)"
  value TransformCacheEntry "serverQuery, clientQuery, booleans"
  max impossible "could be WeakCache?"
}
ObservableQuery }o--o{ QueryManager : "query,variables"
QueryManager }o--o{ DocumentTransform : ""

DocumentTransform {
  WeakSet resultCache
  Trie stableCacheKeys
  keys array "[DocumentNode] or user-defined"
  value DocumentNode
  max impossible "could add WeakCache on top?"
}

QueryManager o|--o{ ServerQuery: ""

ServerQuery {
  type DocumentNode
}

QueryManager o|--|o canonicalStringify: "variables"

canonicalStringify {
  Map sortingMap
  key string "stringified array of object keys"
  value array "sorted array of object keys"
  max impossible "grows with number of object shapes encountered - maybe StrongCache?"
}

print o|--o{ QueryManager: "serverQuery"
TransformedDocumentNode }o--o{ QueryManager: ""
print o|--o{ PersistedQueryLink : "serverQuery"
ServerQuery }o--o{ PersistedQueryLink: ""
print o|--o{ SubscriptionLink : "serverQuery"
ServerQuery }o--o{ SubscriptionLink: ""
print o|--o{ selectHttpOptionsAndBody : "serverQuery"
ServerQuery }o--o{ selectHttpOptionsAndBody: ""


InMemoryCache o|--|o QueryManager: ""
InMemoryCache o|--|o FragmentRegistry: ""

FragmentRegistry {
  optimism lookup
  key string "fragmentName"
  value FragmentDefinitionNode
  max TBD
  optimism transform
  key DocumentNode
  value DocumentNode
  max TBD
  optimism findFragmentSpreads
  key ASTNode
  value FragmentSpreadNode
  max TBD
}


InMemoryCache o|--|o StoreReader: ""
InMemoryCache o|--|o StoreWriter: ""
InMemoryCache o|--|o EntityStore: ""

StoreReader {
  optimism executeSelectionSet
  keys array "[SelectionSetNode, referenceString|StoreObject, varString, boolean] in EntityStore.keyMaker"
  values ExecResult
  max resultCacheMaxSize
  optimism executeSubSelectedArray
  keys array "[FieldNode, any[], varString] in EntityStore.keyMaker"
  values ExecResult
  max resultCacheMaxSize
}

StoreWriter {

}

EntityStore {
  Trie storageTrie
  key array "[referenceString, fieldName]"
  value object
  max unlimited "data, cannot have a maximum"
  Trie keyMaker
  key array "[DocumentNode, Callback, string] for `InMemoryCache.maybeBroadcastWatch`"
  key array "[SelectionSetNode, referenceString|StoreObject, varString, boolean] for `StoreReader.executeSelectionSet`"
  key array "[FieldNode, any[], varString] for `StoreReader.executeSubSelectedArray`"
  value emptyObject
  max unlimited
}

InMemoryCache {
  optimism maybeBroadcastWatch
  keys array "[DocumentNode, Callback, string] in EntityStore.keyMaker"
  value none
  max resultCacheMaxSize
}

InMemoryCache ||..|| ApolloCache: "extends"

ApolloCache {
  optimism getFragmentDoc
  keys array "[DocumentNode, fragmentName?]"
  value DocumentNode
  max TBD
}

removeTypenameFromVariables {
  optimism getVariableDefinitions
  keys array "[DocumentNode]"
  value object
  max TBD
}

I'll go over everything to double-check if I didn't miss any caches, but this is already a first draft :)

Checklist:

If this PR contains changes to the library itself (not necessary for e.g. docs updates), please include a changeset (see CONTRIBUTING.md)
If this PR is a new feature, please reference an issue where a consensus about the design was reached (not necessary for small changes)
Make sure all of the significant new logic is covered by tests

…ache

…uery-weakCache

changeset-bot · 2023-12-04T12:11:04Z

⚠️ No Changeset found

Latest commit: 02c4cff

Merging this PR will not cause a version bump for any packages. If these changes should not result in a new version, you're good to go. If these changes should result in a version bump, you need to add a changeset.

This PR includes no changesets

When changesets are added to this PR, you'll see the packages that this PR includes changesets for and the associated semver types

Click here to learn what changesets are, and how to add one.

Click here if you're a maintainer who wants to add a changeset to this PR

github-actions · 2023-12-04T12:20:08Z

size-limit report 📦

Path	Size
dist/apollo-client.min.cjs	37.86 KB (+0.62% 🔺)
import { ApolloClient, InMemoryCache, HttpLink } from "dist/main.cjs"	44.51 KB (+0.72% 🔺)
import { ApolloClient, InMemoryCache, HttpLink } from "dist/main.cjs" (production)	42.99 KB (+0.75% 🔺)
import { ApolloClient, InMemoryCache, HttpLink } from "dist/index.js"	33.08 KB (+0.68% 🔺)
import { ApolloClient, InMemoryCache, HttpLink } from "dist/index.js" (production)	31.79 KB (+0.76% 🔺)
import { ApolloProvider } from "dist/react/index.js"	1.24 KB (0%)
import { ApolloProvider } from "dist/react/index.js" (production)	1.22 KB (0%)
import { useQuery } from "dist/react/index.js"	4.46 KB (+3.03% 🔺)
import { useQuery } from "dist/react/index.js" (production)	4.27 KB (+3.11% 🔺)
import { useLazyQuery } from "dist/react/index.js"	4.77 KB (+2.83% 🔺)
import { useLazyQuery } from "dist/react/index.js" (production)	4.58 KB (+2.88% 🔺)
import { useMutation } from "dist/react/index.js"	2.75 KB (+5.24% 🔺)
import { useMutation } from "dist/react/index.js" (production)	2.73 KB (+5.31% 🔺)
import { useSubscription } from "dist/react/index.js"	2.43 KB (+6.06% 🔺)
import { useSubscription } from "dist/react/index.js" (production)	2.39 KB (+6.17% 🔺)
import { useSuspenseQuery } from "dist/react/index.js"	4.45 KB (+3.64% 🔺)
import { useSuspenseQuery } from "dist/react/index.js" (production)	3.85 KB (+3.86% 🔺)
import { useBackgroundQuery } from "dist/react/index.js"	4.03 KB (+4.14% 🔺)
import { useBackgroundQuery } from "dist/react/index.js" (production)	3.43 KB (+4.52% 🔺)
import { useLoadableQuery } from "dist/react/index.js"	4.26 KB (+3.78% 🔺)
import { useLoadableQuery } from "dist/react/index.js" (production)	3.67 KB (+4.31% 🔺)
import { useReadQuery } from "dist/react/index.js"	3.01 KB (0%)
import { useReadQuery } from "dist/react/index.js" (production)	2.95 KB (0%)
import { useFragment } from "dist/react/index.js"	2.11 KB (0%)
import { useFragment } from "dist/react/index.js" (production)	2.06 KB (0%)

jerelmiller

I think this is generally really great! Since this will be a public API, I had a few suggestions for future-proofing this a bit more, but otherwise I like the approach you've taken here. Really cool!

src/utilities/caching/caches.ts

jerelmiller · 2023-12-13T23:41:27Z

src/utilities/caching/sizes.ts

+}
+
+/**
+ * The cache sizes used by various Apollo Client caches.


Suggested change

* The cache sizes used by various Apollo Client caches.

* The cache sizes used by various internal Apollo Client caches. These internal

* caches are used to speed up potentially expensive, repeated operations.

It might make sense to add the internal modifier here so we don't confuse those that may think this is related to the InMemoryCache. I also think it might make sense just to go into a bit more detail on why they exist. It adds some nice context for the rest of the description down below, especially the paragraph that begins with "As a result, these cache sizes..."

jerelmiller · 2023-12-13T23:43:11Z

src/utilities/caching/sizes.ts

+ * Note that these caches are all derivative and if an item is cache-collected,
+ * it's not the end of the world - the cached item will just be recalculated.


Suggested change

* Note that these caches are all derivative and if an item is cache-collected,

* it's not the end of the world - the cached item will just be recalculated.

* Note that these caches are all derivative so if an item is cache-collected,

* the cached item will be recalculated.

Adding "its not the end of the world" makes it sound like a bad thing that shouldn't happen. I think removing that statement still makes this make sense, so I'd opt just to leave it out.

jerelmiller · 2023-12-14T00:17:51Z

src/utilities/caching/sizes.ts

+   * This method is called from `transformDocument`, which is called from
+   * `QueryManager` with a user-provided DocumentNode.
+   * It is also called with already-transformed DocumentNodes, assuming the
+   * user provided additional transforms.


Suggested change

* This method is called from `transformDocument`, which is called from

* `QueryManager` with a user-provided DocumentNode.

* It is also called with already-transformed DocumentNodes, assuming the

* user provided additional transforms.

* This cache holds the results from transformed DocumentNodes to prevent

* unnecessary recomputation. Document transforms are used whenever a query

* is executed in the client.

Again, I would try and avoid some of the internal mechanics of where its called and instead provide a broader overview here. DocumentTransforms are also called in ObservableQuery as well, but I don't think we need to provide a reference to every location it could be used (and who knows, we may add to it in the future, so I'd hate to keep this comment up-to-date).

It's difficult - here I provide this much details because this cache is not a DocumentTransform cache and should not be confused with those.
It's also important to distinguish here the they are called with a user-provided DocumentNode (so they require a smaller cache size than DocumentTransforms) would.
It's less important for cache size what the result is, and much more important what the input/cache key is going to be.

jerelmiller · 2023-12-14T00:28:34Z

src/utilities/caching/sizes.ts

+   * The cache size here should be chosen with other DocumentTransforms in mind.
+   * For example, if there was a DocumentTransform that would take `n` DocumentNodes,
+   * and returned a differently-transformed DocumentNode depending if the app is
+   * online or offline, then we assume that the cache returns `2*n` documents.
+   *
+   * No user-provided DocumentNode will actually be "the last one", as we run the
+   * `defaultDocumentTransform` before *and* after the user-provided transforms.
+   *
+   * So if we assume that the user-provided transforms receive `n` documents and
+   * return `n` documents, the cache size should be `2*n`.
+   *
+   * If we assume that the user-provided transforms receive `n` documents and
+   * returns `2*n` documents, the cache size should be `3*n`.
+   *
+   * This size should also then be used in every other cache that mentions that
+   * it operates on a "transformed" DocumentNode.


I wonder if we should word this more towards the "when you .concat/.split transforms together" type approach. I think what you have in this section has some really good information and consideration, but I think its a bit difficult to understand what some of it means without knowing the behind-the-scenes implementation.

For example defaultDocumentTransform is not mentioned anywhere in the client API or in our docs. Unless you knew how we use your custom transform in QueryManager, that statement makes no sense (i.e. "where did defaultDocumentTransform come from?).

I think you could generalize some of this, while simultaneously keeping some of this useful information by saying something like this:

The cache size here should be chosen with other DocumentTransforms in mind. Document transforms can be combined through concatenation, where each transform will process the document node and pass it to the next transform in the chain. This means that a single document node may pass through several DocumentTransforms, each one caching its result along the way. // additional math stuff you have up here

Don't feel like you need to take exactly what I've worded here, but just trying to get at the fact that I'd love to talk about this more from the perspective of the public API (.concat, etc) and try and reduce the amount we talk about the internal implementation (i.e. defaultDocumentTransform). A link to the public docs page might also be useful here if you want to provide additional context on some of the stuff going on here.

jerelmiller · 2023-12-14T00:30:28Z

src/utilities/caching/sizes.ts

+   * recommended to set this to a high value.
+   */
+  executeSubSelectedArray: number;
+}


I'll avoid too many comments on each of these tsdocs, but hopefully you sense a theme in general. Since this will be a public API, I'd love to try and generalize some of these so that users can understand the purposes of each without needing to have explicit knowledge of the implementation details and specific APIs used behind the scenes.

Yeah, it's really a difficult line to choose here - the implementation details are not too important, but it is important what will be the input to these caches, and in which sequence data will traverse from one to the next - having a too small cache size in an "early" cache will artificially blow up caches down the line.

Let's talk all of that through in person later, maybe we find a better way of communicating that :)

jerelmiller · 2023-12-14T00:30:44Z

src/utilities/caching/sizes.ts

+ * You can directly modify this object, but any modification will
+ * only have an effect on caches that are created after the modification.
+ *
+ * So for global caches, such as `parser`, `canonicalStringify` and `print`,
+ * you might need to call `.reset` on them, which will essentially re-create them.
+ *
+ * Alternatively, you can set `globalThis[Symbol.for("apollo.cacheSize")]` before
+ * you load the Apollo Client package:


src/utilities/caching/sizes.ts

Co-authored-by: Jerel Miller <[email protected]>

phryneas · 2023-12-14T13:11:26Z

api-extractor.json

+      "ae-internal-missing-underscore": {
+        "logLevel": "none",
+        "addToApiReportFile": false
+      },
+


This suppresses this warning:

// Warning: (ae-internal-missing-underscore) The name "AutoCleanedStrongCache" should be prefixed with an underscore because the declaration is marked as @internal

I assume we don't want to prefix all our internal apis with _ :)

phryneas · 2023-12-14T16:35:27Z

src/utilities/caching/sizes.ts

+ * encountered, but rather to hold a reasonable number of values that can be
+ * assumed to be on the screen at any given time.
+ *
+ * We assume a "base value" of 1000 here, which is already very generous.


Suggested change

* We assume a "base value" of 1000 here, which is already very generous.

* We assume a "base value" of 1000 user-supplied Document Nodes here, which is already very generous.

jerelmiller

🎉

phryneas · 2023-12-15T09:49:11Z

Getting this merged and will do more comments tweaking in #11415

phryneas added 19 commits November 16, 2023 18:09

print: use WeakCache instead of WeakMap

d5d8cb1

format

29bf59d

pull in memory testing tools from PR 11358

9942e61

Persisted Query Link: improve memory management

b933dfb

re-add accidentally removed dependency

a117bd0

update api

0a1b718

Merge remote-tracking branch 'origin/release-3.9' into pr/print-weakC…

d64eee1

…ache

update size limit

bd40e8e

Merge branch 'release-3.9' into pr/print-weakCache

c712446

size-limit

f84da80

Merge branch 'pr/print-weakCache' into pr/persisted-query-weakCache

7485662

Merge branch 'release-3.9' into pr/persisted-query-weakCache

5361820

fix test failure

6ad8e56

better cleanup of interval/timeout

7cbf8dc

Merge remote-tracking branch 'origin/release-3.9' into pr/persisted-q…

d609c67

…uery-weakCache

apply formatting

0521529

remove unneccessary type

188dd0a

format again after updating prettier

4283908

add central confiuguration for Apollo Client cache sizes

9d3bd68

resolve import cycle

825e012

phryneas changed the title ~~add central confiuguration for Apollo Client cache sizes~~ add central configuration for Apollo Client cache sizes Dec 4, 2023

phryneas added 3 commits December 4, 2023 15:17

add exports

3762c5e

reduce cache collection throttle timeout

554240c

typo in comment

af8b16f

phryneas added this to the MemoryAnalysis milestone Dec 4, 2023

phryneas added 3 commits December 4, 2023 17:21

fix circular import

324e4c9

size-limits

2e65620

update type to remove WeakKey

717dc2d

phryneas added 5 commits December 6, 2023 11:44

add type export

517b133

update test

06e2e2c

chores

f380a57

formatting

b8a1f0c

adjust more tests

28a4289

phryneas marked this pull request as ready for review December 6, 2023 16:01

phryneas requested review from jerelmiller, benjamn and alessbell December 6, 2023 16:01

jerelmiller reviewed Dec 14, 2023

View reviewed changes

phryneas and others added 2 commits December 14, 2023 11:43

rename to AutoCleaned*Cache, mark @internal

12165c3

Update src/utilities/caching/sizes.ts

59e0d10

Co-authored-by: Jerel Miller <[email protected]>

Base automatically changed from pr/persisted-query-weakCache to release-3.9 December 14, 2023 12:08

phryneas added 4 commits December 14, 2023 13:42

Merge remote-tracking branch 'origin/release-3.9' into pr/cache-sizes

4126814

chores

1f792fa

size-limits

1a82eab

unify comment release tags

cf06751

phryneas commented Dec 14, 2023

View reviewed changes

update exports

c792601

phryneas commented Dec 14, 2023

View reviewed changes

phryneas added 3 commits December 14, 2023 18:18

naming & lazy bundling approach through inlining

5d70d2d

chores

7fd2830

size

02c4cff

jerelmiller approved these changes Dec 14, 2023

View reviewed changes

github-actions bot added the auto-cleanup 🤖 label Dec 14, 2023

phryneas merged commit f5420b0 into release-3.9 Dec 15, 2023

phryneas deleted the pr/cache-sizes branch December 15, 2023 09:52

phryneas mentioned this pull request Dec 15, 2023

add a "memory management" documentation page #11415

Merged

3 tasks

github-actions bot locked as resolved and limited conversation to collaborators Jan 15, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

add central configuration for Apollo Client cache sizes #11408

add central configuration for Apollo Client cache sizes #11408

phryneas commented Dec 4, 2023 •

edited

Loading

changeset-bot bot commented Dec 4, 2023 •

edited

Loading

github-actions bot commented Dec 4, 2023 •

edited

Loading

jerelmiller left a comment

jerelmiller Dec 13, 2023

jerelmiller Dec 13, 2023

jerelmiller Dec 14, 2023

phryneas Dec 14, 2023

jerelmiller Dec 14, 2023

jerelmiller Dec 14, 2023

phryneas Dec 14, 2023

jerelmiller Dec 14, 2023

phryneas Dec 14, 2023

phryneas Dec 14, 2023 •

edited

Loading

jerelmiller left a comment

phryneas commented Dec 15, 2023

	* The cache sizes used by various Apollo Client caches.
	* The cache sizes used by various internal Apollo Client caches. These internal
	* caches are used to speed up potentially expensive, repeated operations.

		* Note that these caches are all derivative and if an item is cache-collected,
		* it's not the end of the world - the cached item will just be recalculated.

	* We assume a "base value" of 1000 here, which is already very generous.
	* We assume a "base value" of 1000 user-supplied Document Nodes here, which is already very generous.

add central configuration for Apollo Client cache sizes #11408

add central configuration for Apollo Client cache sizes #11408

Conversation

phryneas commented Dec 4, 2023 • edited Loading

Open Questions:

Real-world values

Caches and their relationships

Checklist:

changeset-bot bot commented Dec 4, 2023 • edited Loading

⚠️ No Changeset found

github-actions bot commented Dec 4, 2023 • edited Loading

size-limit report 📦

jerelmiller left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

phryneas Dec 14, 2023 • edited Loading

Choose a reason for hiding this comment

jerelmiller left a comment

Choose a reason for hiding this comment

phryneas commented Dec 15, 2023

phryneas commented Dec 4, 2023 •

edited

Loading

changeset-bot bot commented Dec 4, 2023 •

edited

Loading

github-actions bot commented Dec 4, 2023 •

edited

Loading

phryneas Dec 14, 2023 •

edited

Loading