From ef01aaff168d45a3878e30863403dfd9a2a86d27 Mon Sep 17 00:00:00 2001 From: imilinovic Date: Thu, 31 Aug 2023 12:32:10 +0200 Subject: [PATCH 1/5] meta stats docs --- mage/query-modules/cpp/meta.md | 199 ++++++++++++++++++++++++++++++++ mage/templates/_mage_spells.mdx | 1 + sidebars/sidebarsMAGE.js | 1 + 3 files changed, 201 insertions(+) create mode 100644 mage/query-modules/cpp/meta.md diff --git a/mage/query-modules/cpp/meta.md b/mage/query-modules/cpp/meta.md new file mode 100644 index 00000000000..0135a11f2f3 --- /dev/null +++ b/mage/query-modules/cpp/meta.md @@ -0,0 +1,199 @@ +--- +id: meta_module +title: meta_module +sidebar_label: meta_module +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import RunOnSubgraph from '../../templates/_run_on_subgraph.mdx'; + +export const Highlight = ({children, color}) => ( + +{children} + +); + + +The **meta** module provides a set of procedures for generating metadata about the database. + +[![docs-source](https://img.shields.io/badge/source-util_module-FB6E00?logo=github&style=for-the-badge)](https://github.com/memgraph/mage/tree/main/cpp/meta_module) + +| Trait | Value | +| ------------------- | ----------------------------------------------------- | +| **Module type** | **algorithm** | +| **Implementation** | **C++** | +| **Parallelism** | **sequential** | + +### Procedures + +## stats + +The stats procedure returns the following metadata about the graph: +- `labelCount` ➡ number of unique labels in nodes +- `relationshipTypeCount` ➡ number of unique relationship types (labels) +- `nodeCount` ➡ number of nodes in the graph +- `relationshipCount` ➡ number of relationships in the graph +- `labels` ➡ map with the following (key, value) pairs: + - `label` : number_of_occurrences +- `relationshipTypes` ➡ map with the following (key, value) pairs: + - `(:label)-[:relationship_type]->()` : number_of_occurences + - `()-[:relationship_type]->(:label)` : number_of_occurences + - `()-[:relationship_type]->()` : number_of_occurences +- `relationshipTypesCount` ➡ map with the following (key, value) pairs: + - `relationship_type` : number_of_occurences +- `stats` ➡ map which contains all of the above + +It is split into two version which return the same metadata: +- stats_online - works in **O(1)** and requires setting up a trigger +- stats_offline - traverses the whole graph + +### `stats_online(update_stats)` + +Retrieves the graph metadata in **O(1)** complexity. Requires setting up the following trigger: + + ```cypher + CREATE TRIGGER meta_trigger BEFORE COMMIT EXECUTE CALL meta.update(createdObjects, deletedObjects, removedVertexProperties, removedEdgeProperties, setVertexLabels, removedVertexLabels); + ``` +This procedure tracks the data created/deleted/modified after the trigger was added. If you want to return the metadata about the whole graph you need to run the *stats_online* procedure with the *update_stats* flag set to true **once**. That flag will cause the procedure to traverse the whole graph to update the metadata. After that you can always run with the *update_stats* flag set to false and the procedure will return the metadata in **O(1)** complexity. + + +#### Input: + +- `update_stats: bool (default=false)` ➡ if true traverses the whole graph to update the metadata otherwise returns the stored metadata + +#### Output: + +- `labelCount: int` ➡ number of unique labels in nodes +- `relationshipTypeCount: int` ➡ number of unique relationship types (labels) +- `nodeCount: int` ➡ number of nodes in the graph +- `relationshipCount: int` ➡ number of relationships in the graph +- `labels: Map[string: int]` ➡ map with the following (key, value) pairs: + - `label` : number_of_occurrences +- `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: + - `(:label)-[:relationship_type]->()` : number_of_occurences + - `()-[:relationship_type]->(:label)` : number_of_occurences + - `()-[:relationship_type]->()` : number_of_occurences +- `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: + - `relationship_type` : number_of_occurences +- `stats` ➡ map which contains all of the above + +#### Usage: + +Running stats on the following graph: +```cypher +MERGE (a:Node {id: 0}) MERGE (b:Node {id: 1}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 1}) MERGE (b:Node {id: 2}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 2}) MERGE (b:Node {id: 0}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 3}) CREATE (a)-[:Relation2]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 4}) CREATE (a)-[:Relation2]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 5}) CREATE (a)-[:Relation2]->(b); +``` + +```cypher +CALL meta.stats_online() YIELD stats; +``` + +```plaintext ++-------------------------------------------------------+ +| stats | ++-------------------------------------------------------+ +| | +|{ | +| "labelCount": 1, | +| "labels": { | +| "Node": 6 | +| }, | +| "nodeCount": 6, | +| "propertyKeyCount": 1, | +| "relationshipCount": 6, | +| "relationshipTypeCount": 2, | +| "relationshipTypes": { | +| "()-[:Relation1]->()": 3, | +| "()-[:Relation1]->(:Node)": 3, | +| "()-[:Relation2]->()": 3, | +| "()-[:Relation2]->(:Node)": 3, | +| "(:Node)-[:Relation1]->()": 3, | +| "(:Node)-[:Relation2]->()": 3 | +| }, | +| "relationshipTypesCount": { | +| "Relation1": 3, | +| "Relation2": 3 | +| } | +|} | +| | ++-------------------------------------------------------+ +``` + +### `stats_offline()` + +Retrieves the graph metadata by traversing the whole graph. *stats_online* should be preferred because of the better complexity unless you don't want to use triggers. + +#### Output: + +- `labelCount: int` ➡ number of unique labels in nodes +- `relationshipTypeCount: int` ➡ number of unique relationship types (labels) +- `nodeCount: int` ➡ number of nodes in the graph +- `relationshipCount: int` ➡ number of relationships in the graph +- `labels: Map[string: int]` ➡ map with the following (key, value) pairs: + - `label` : number_of_occurrences +- `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: + - `(:label)-[:relationship_type]->()` : number_of_occurences + - `()-[:relationship_type]->(:label)` : number_of_occurences + - `()-[:relationship_type]->()` : number_of_occurences +- `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: + - `relationship_type` : number_of_occurences +- `stats` ➡ map which contains all of the above + +#### Usage: + +Running stats on the following graph: +```cypher +MERGE (a:Node {id: 0}) MERGE (b:Node {id: 1}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 1}) MERGE (b:Node {id: 2}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 2}) MERGE (b:Node {id: 0}) CREATE (a)-[:Relation1]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 3}) CREATE (a)-[:Relation2]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 4}) CREATE (a)-[:Relation2]->(b); +MERGE (a:Node {id: 3}) MERGE (b:Node {id: 5}) CREATE (a)-[:Relation2]->(b); +``` + +```cypher +CALL meta.stats_offline() YIELD stats; +``` + +```plaintext ++-------------------------------------------------------+ +| stats | ++-------------------------------------------------------+ +| | +|{ | +| "labelCount": 1, | +| "labels": { | +| "Node": 6 | +| }, | +| "nodeCount": 6, | +| "propertyKeyCount": 1, | +| "relationshipCount": 6, | +| "relationshipTypeCount": 2, | +| "relationshipTypes": { | +| "()-[:Relation1]->()": 3, | +| "()-[:Relation1]->(:Node)": 3, | +| "()-[:Relation2]->()": 3, | +| "()-[:Relation2]->(:Node)": 3, | +| "(:Node)-[:Relation1]->()": 3, | +| "(:Node)-[:Relation2]->()": 3 | +| }, | +| "relationshipTypesCount": { | +| "Relation1": 3, | +| "Relation2": 3 | +| } | +|} | +| | ++-------------------------------------------------------+ +``` diff --git a/mage/templates/_mage_spells.mdx b/mage/templates/_mage_spells.mdx index c5010a8e4a0..a24547717f3 100644 --- a/mage/templates/_mage_spells.mdx +++ b/mage/templates/_mage_spells.mdx @@ -60,6 +60,7 @@ | [import_util](/mage/query-modules/python/import-util) | Python | A module for importing data from different formats (JSON). | | [json_util](/mage/query-modules/python/json-util) | Python | A module for loading JSON from a local file or remote address. | | [llm_util](/mage/query-modules/python/llm-util) | Python | A module that contains procedures describing graphs in a format best suited for large language models (LLMs). | +| [meta](/mage/query-modules/cpp/meta) | C++ | A module that contains procedures describing graphs on a meta-level. | | [meta_util](/mage/query-modules/python/meta-util) | Python | A module that contains procedures describing graphs on a meta-level. | | [migrate](/mage/query-modules/python/migrate) | Python | A module that can access data from a MySQL, SQL Server or Oracle database. | | [periodic](/mage/query-modules/cpp/periodic) | C++ | A module containing procedures for periodically running difficult and/or memory/time consuming queries. | diff --git a/sidebars/sidebarsMAGE.js b/sidebars/sidebarsMAGE.js index c7b359cf431..c82ea7b311d 100644 --- a/sidebars/sidebarsMAGE.js +++ b/sidebars/sidebarsMAGE.js @@ -52,6 +52,7 @@ module.exports = { "query-modules/python/llm-util", "query-modules/cpp/map", "query-modules/python/max-flow", + "query-modules/cpp/meta", "query-modules/python/meta-util", "query-modules/python/migrate", "query-modules/python/node-classification-with-gnn", From ad30546e82b3c11aada8bfa35da9bf2c276c5ac2 Mon Sep 17 00:00:00 2001 From: imilinovic Date: Thu, 31 Aug 2023 13:08:23 +0200 Subject: [PATCH 2/5] typo fix --- mage/query-modules/cpp/meta.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mage/query-modules/cpp/meta.md b/mage/query-modules/cpp/meta.md index 0135a11f2f3..33033148fec 100644 --- a/mage/query-modules/cpp/meta.md +++ b/mage/query-modules/cpp/meta.md @@ -43,11 +43,11 @@ The stats procedure returns the following metadata about the graph: - `labels` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes` ➡ map with the following (key, value) pairs: - - `(:label)-[:relationship_type]->()` : number_of_occurences - - `()-[:relationship_type]->(:label)` : number_of_occurences - - `()-[:relationship_type]->()` : number_of_occurences + - `(:label)-[:relationship_type]->()` : number_of_occurrences + - `()-[:relationship_type]->(:label)` : number_of_occurrences + - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount` ➡ map with the following (key, value) pairs: - - `relationship_type` : number_of_occurences + - `relationship_type` : number_of_occurrences - `stats` ➡ map which contains all of the above It is split into two version which return the same metadata: @@ -77,11 +77,11 @@ This procedure tracks the data created/deleted/modified after the trigger was ad - `labels: Map[string: int]` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: - - `(:label)-[:relationship_type]->()` : number_of_occurences - - `()-[:relationship_type]->(:label)` : number_of_occurences - - `()-[:relationship_type]->()` : number_of_occurences + - `(:label)-[:relationship_type]->()` : number_of_occurrences + - `()-[:relationship_type]->(:label)` : number_of_occurrences + - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: - - `relationship_type` : number_of_occurences + - `relationship_type` : number_of_occurrences - `stats` ➡ map which contains all of the above #### Usage: @@ -144,11 +144,11 @@ Retrieves the graph metadata by traversing the whole graph. *stats_online* shoul - `labels: Map[string: int]` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: - - `(:label)-[:relationship_type]->()` : number_of_occurences - - `()-[:relationship_type]->(:label)` : number_of_occurences - - `()-[:relationship_type]->()` : number_of_occurences + - `(:label)-[:relationship_type]->()` : number_of_occurrences + - `()-[:relationship_type]->(:label)` : number_of_occurrences + - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: - - `relationship_type` : number_of_occurences + - `relationship_type` : number_of_occurrences - `stats` ➡ map which contains all of the above #### Usage: From 177763a593e95071c0b48f5f00cc09ff0a305dba Mon Sep 17 00:00:00 2001 From: imilinovic Date: Thu, 31 Aug 2023 13:13:00 +0200 Subject: [PATCH 3/5] fix header --- mage/query-modules/cpp/meta.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mage/query-modules/cpp/meta.md b/mage/query-modules/cpp/meta.md index 33033148fec..bb0523cd4fd 100644 --- a/mage/query-modules/cpp/meta.md +++ b/mage/query-modules/cpp/meta.md @@ -1,7 +1,7 @@ --- -id: meta_module -title: meta_module -sidebar_label: meta_module +id: meta +title: meta +sidebar_label: meta --- import Tabs from '@theme/Tabs'; From 92af8619f768370e4cca1993c170a58b2f70ec7c Mon Sep 17 00:00:00 2001 From: Vlasta <95473291+vpavicic@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:34:23 +0200 Subject: [PATCH 4/5] Apply suggestions from code review --- mage/query-modules/cpp/meta.md | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/mage/query-modules/cpp/meta.md b/mage/query-modules/cpp/meta.md index bb0523cd4fd..5d1ddb0e4c2 100644 --- a/mage/query-modules/cpp/meta.md +++ b/mage/query-modules/cpp/meta.md @@ -36,10 +36,10 @@ The **meta** module provides a set of procedures for generating metadata about t ## stats The stats procedure returns the following metadata about the graph: -- `labelCount` ➡ number of unique labels in nodes -- `relationshipTypeCount` ➡ number of unique relationship types (labels) -- `nodeCount` ➡ number of nodes in the graph -- `relationshipCount` ➡ number of relationships in the graph +- `labelCount` ➡ number of unique labels in nodes. +- `relationshipTypeCount` ➡ number of unique relationship types (labels). +- `nodeCount` ➡ number of nodes in the graph. +- `relationshipCount` ➡ number of relationships in the graph. - `labels` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes` ➡ map with the following (key, value) pairs: @@ -48,7 +48,7 @@ The stats procedure returns the following metadata about the graph: - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount` ➡ map with the following (key, value) pairs: - `relationship_type` : number_of_occurrences -- `stats` ➡ map which contains all of the above +- `stats` ➡ map which contains all of the above. It is split into two version which return the same metadata: - stats_online - works in **O(1)** and requires setting up a trigger @@ -63,17 +63,16 @@ Retrieves the graph metadata in **O(1)** complexity. Requires setting up the fol ``` This procedure tracks the data created/deleted/modified after the trigger was added. If you want to return the metadata about the whole graph you need to run the *stats_online* procedure with the *update_stats* flag set to true **once**. That flag will cause the procedure to traverse the whole graph to update the metadata. After that you can always run with the *update_stats* flag set to false and the procedure will return the metadata in **O(1)** complexity. - #### Input: -- `update_stats: bool (default=false)` ➡ if true traverses the whole graph to update the metadata otherwise returns the stored metadata +- `update_stats: bool (default=false)` ➡ if true traverses the whole graph to update the metadata otherwise returns the stored metadata. #### Output: -- `labelCount: int` ➡ number of unique labels in nodes -- `relationshipTypeCount: int` ➡ number of unique relationship types (labels) -- `nodeCount: int` ➡ number of nodes in the graph -- `relationshipCount: int` ➡ number of relationships in the graph +- `labelCount: int` ➡ number of unique labels in nodes. +- `relationshipTypeCount: int` ➡ number of unique relationship types (labels). +- `nodeCount: int` ➡ number of nodes in the graph. +- `relationshipCount: int` ➡ number of relationships in the graph. - `labels: Map[string: int]` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: @@ -82,7 +81,7 @@ This procedure tracks the data created/deleted/modified after the trigger was ad - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: - `relationship_type` : number_of_occurrences -- `stats` ➡ map which contains all of the above +- `stats` ➡ map which contains all of the above. #### Usage: @@ -137,10 +136,10 @@ Retrieves the graph metadata by traversing the whole graph. *stats_online* shoul #### Output: -- `labelCount: int` ➡ number of unique labels in nodes -- `relationshipTypeCount: int` ➡ number of unique relationship types (labels) -- `nodeCount: int` ➡ number of nodes in the graph -- `relationshipCount: int` ➡ number of relationships in the graph +- `labelCount: int` ➡ number of unique labels in nodes. +- `relationshipTypeCount: int` ➡ number of unique relationship types (labels). +- `nodeCount: int` ➡ number of nodes in the graph. +- `relationshipCount: int` ➡ number of relationships in the graph. - `labels: Map[string: int]` ➡ map with the following (key, value) pairs: - `label` : number_of_occurrences - `relationshipTypes: Map[string: int]` ➡ map with the following (key, value) pairs: @@ -149,7 +148,7 @@ Retrieves the graph metadata by traversing the whole graph. *stats_online* shoul - `()-[:relationship_type]->()` : number_of_occurrences - `relationshipTypesCount: Map[string: int]` ➡ map with the following (key, value) pairs: - `relationship_type` : number_of_occurrences -- `stats` ➡ map which contains all of the above +- `stats` ➡ map which contains all of the above. #### Usage: From 0b41f9e48efcf948f13570f4951b52b52fc3b00c Mon Sep 17 00:00:00 2001 From: imilinovic Date: Mon, 11 Sep 2023 07:29:21 +0200 Subject: [PATCH 5/5] remove status summary --- mage/query-modules/cpp/meta.md | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/mage/query-modules/cpp/meta.md b/mage/query-modules/cpp/meta.md index 5d1ddb0e4c2..98569b59379 100644 --- a/mage/query-modules/cpp/meta.md +++ b/mage/query-modules/cpp/meta.md @@ -29,31 +29,10 @@ The **meta** module provides a set of procedures for generating metadata about t | ------------------- | ----------------------------------------------------- | | **Module type** | **algorithm** | | **Implementation** | **C++** | -| **Parallelism** | **sequential** | +| **Parallelism** | **parallel** | ### Procedures -## stats - -The stats procedure returns the following metadata about the graph: -- `labelCount` ➡ number of unique labels in nodes. -- `relationshipTypeCount` ➡ number of unique relationship types (labels). -- `nodeCount` ➡ number of nodes in the graph. -- `relationshipCount` ➡ number of relationships in the graph. -- `labels` ➡ map with the following (key, value) pairs: - - `label` : number_of_occurrences -- `relationshipTypes` ➡ map with the following (key, value) pairs: - - `(:label)-[:relationship_type]->()` : number_of_occurrences - - `()-[:relationship_type]->(:label)` : number_of_occurrences - - `()-[:relationship_type]->()` : number_of_occurrences -- `relationshipTypesCount` ➡ map with the following (key, value) pairs: - - `relationship_type` : number_of_occurrences -- `stats` ➡ map which contains all of the above. - -It is split into two version which return the same metadata: -- stats_online - works in **O(1)** and requires setting up a trigger -- stats_offline - traverses the whole graph - ### `stats_online(update_stats)` Retrieves the graph metadata in **O(1)** complexity. Requires setting up the following trigger: @@ -61,6 +40,7 @@ Retrieves the graph metadata in **O(1)** complexity. Requires setting up the fol ```cypher CREATE TRIGGER meta_trigger BEFORE COMMIT EXECUTE CALL meta.update(createdObjects, deletedObjects, removedVertexProperties, removedEdgeProperties, setVertexLabels, removedVertexLabels); ``` + This procedure tracks the data created/deleted/modified after the trigger was added. If you want to return the metadata about the whole graph you need to run the *stats_online* procedure with the *update_stats* flag set to true **once**. That flag will cause the procedure to traverse the whole graph to update the metadata. After that you can always run with the *update_stats* flag set to false and the procedure will return the metadata in **O(1)** complexity. #### Input: @@ -132,7 +112,7 @@ CALL meta.stats_online() YIELD stats; ### `stats_offline()` -Retrieves the graph metadata by traversing the whole graph. *stats_online* should be preferred because of the better complexity unless you don't want to use triggers. +Retrieves the graph metadata by traversing the whole graph. `stats_online` should be preferred because of the better complexity unless you don't want to use triggers. #### Output: