From 38b4885ec437e27d87d887409f4f733d8d44416b Mon Sep 17 00:00:00 2001 From: Josh Cooper Date: Thu, 7 Dec 2023 22:56:51 -0800 Subject: [PATCH 1/5] Don't join token once per loaded fact When searching for loaded facts that match the user query, we were joining the query tokens once per loaded fact. For example, if the user query was `networking.interfaces.eth0`, then we split the query into tokens ['networking', 'interfaces', 'eth0']. Then for each loaded fact definition, we joined the query tokens back to 'networking.interfaces.eth0`. If none matched, then we tried again using the parent prefix ['networking', 'interfaces'], again joining for each loaded fact. Now only join the query tokens once outside of the method. --- lib/facter/framework/parsers/query_parser.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/facter/framework/parsers/query_parser.rb b/lib/facter/framework/parsers/query_parser.rb index 0e095b16e5..a0214330bc 100644 --- a/lib/facter/framework/parsers/query_parser.rb +++ b/lib/facter/framework/parsers/query_parser.rb @@ -50,9 +50,11 @@ def search_for_facts(query, loaded_fact_hash) query_tokens = query.end_with?('.*') ? [query] : query.split('.') size = query_tokens.size + # Try to match the most specific query_tokens to the least, returning the first match size.times do |i| query_token_range = 0..size - i - 1 - resolvable_fact_list = get_facts_matching_tokens(query_tokens, query_token_range, loaded_fact_hash) + query_fact = query_tokens[query_token_range].join('.') + resolvable_fact_list = get_facts_matching_tokens(query_tokens, query_fact, loaded_facts_hash) return resolvable_fact_list if resolvable_fact_list.any? end @@ -62,12 +64,10 @@ def search_for_facts(query, loaded_fact_hash) resolvable_fact_list end - def get_facts_matching_tokens(query_tokens, query_token_range, loaded_fact_hash) + def get_facts_matching_tokens(query_tokens, query_fact, loaded_facts_hash) resolvable_fact_list = [] loaded_fact_hash.each do |loaded_fact| - query_fact = query_tokens[query_token_range].join('.') - next unless found_fact?(loaded_fact.name, query_fact) searched_fact = construct_loaded_fact(query_tokens, loaded_fact) From bd8d7d0d799c822d5d72462d1cd7e9a9e85f04b2 Mon Sep 17 00:00:00 2001 From: Josh Cooper Date: Fri, 8 Dec 2023 13:57:28 -0800 Subject: [PATCH 2/5] Rename loaded_facts variable Rename loaded_facts_hash and loaded_fact to loaded_facts --- lib/facter/framework/parsers/query_parser.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/facter/framework/parsers/query_parser.rb b/lib/facter/framework/parsers/query_parser.rb index a0214330bc..11527e8d2d 100644 --- a/lib/facter/framework/parsers/query_parser.rb +++ b/lib/facter/framework/parsers/query_parser.rb @@ -18,18 +18,18 @@ class << self # Because a root fact will always be resolved by a collection of child facts, # we can return one or more child facts for each parent. # - # query - is the user input used to search for facts - # loaded_fact - is a list with all facts for the current operating system + # @param query_list [Array] The list of facts to search for + # @param loaded_facts [Array] All of the fact definitions for the current operating system # - # Returns a list of SearchedFact objects that resolve the users query. - def parse(query_list, loaded_fact) + # @return [Array] a list of searchable facts that resolve the user's query + def parse(query_list, loaded_facts) matched_facts = [] @query_list = query_list - return no_user_query(loaded_fact) unless query_list.any? + return no_user_query(loaded_facts) unless query_list.any? query_list.each do |query| - found_facts = search_for_facts(query, loaded_fact) + found_facts = search_for_facts(query, loaded_facts) matched_facts << found_facts end @@ -44,7 +44,7 @@ def no_user_query(loaded_facts) searched_facts end - def search_for_facts(query, loaded_fact_hash) + def search_for_facts(query, loaded_facts) resolvable_fact_list = [] query = query.to_s query_tokens = query.end_with?('.*') ? [query] : query.split('.') @@ -54,7 +54,7 @@ def search_for_facts(query, loaded_fact_hash) size.times do |i| query_token_range = 0..size - i - 1 query_fact = query_tokens[query_token_range].join('.') - resolvable_fact_list = get_facts_matching_tokens(query_tokens, query_fact, loaded_facts_hash) + resolvable_fact_list = get_facts_matching_tokens(query_tokens, query_fact, loaded_facts) return resolvable_fact_list if resolvable_fact_list.any? end @@ -64,10 +64,10 @@ def search_for_facts(query, loaded_fact_hash) resolvable_fact_list end - def get_facts_matching_tokens(query_tokens, query_fact, loaded_facts_hash) + def get_facts_matching_tokens(query_tokens, query_fact, loaded_facts) resolvable_fact_list = [] - loaded_fact_hash.each do |loaded_fact| + loaded_facts.each do |loaded_fact| next unless found_fact?(loaded_fact.name, query_fact) searched_fact = construct_loaded_fact(query_tokens, loaded_fact) From 286fbac0f9a5bbd41b396976399a776adbafb972 Mon Sep 17 00:00:00 2001 From: Josh Cooper Date: Fri, 8 Dec 2023 14:01:28 -0800 Subject: [PATCH 3/5] Refactor wildcard matching It's wasn't super obvious, but if `fact_with_wildcard` was true, then we returned true or false depending on whether query_fact didn't match the fact name exactly or not. Refactor the code to make it explicit. Some more context, some legacy facts are dynamically generated and their names contain wildcards, such as the `blockdevice_.*_model` legacy fact. If the user queries for `blockdevice_sda_model` specifically, then we need to return the legacy fact. In that case `fact_with_wildcard` will be true and the fact and query names match exactly, so `found_fact?` will return true. However, if you query for the similarly named structured fact `blockdevice.sda.model`, then `fact_with_wildcard` will be false, since we want to return the structured fact, not the legacy one. --- lib/facter/framework/parsers/query_parser.rb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/facter/framework/parsers/query_parser.rb b/lib/facter/framework/parsers/query_parser.rb index 11527e8d2d..3773190ef2 100644 --- a/lib/facter/framework/parsers/query_parser.rb +++ b/lib/facter/framework/parsers/query_parser.rb @@ -79,16 +79,20 @@ def get_facts_matching_tokens(query_tokens, query_fact, loaded_facts) end def found_fact?(fact_name, query_fact) + # This is the case where the fact_name contains a wildcard like + # blockdevice_.*_model and we're querying for the legacy fact + # specifically using 'blockdevice_sba_model' and we don't want the query + # 'blockdevice.sba.model' to match fact_with_wildcard = fact_name.include?('.*') && !query_fact.include?('.') - processed_equery_fact = query_fact.gsub('\\', '\\\\\\\\') - - return false if fact_with_wildcard && !query_fact.match("^#{fact_name}$") - - # Must escape metacharacters (like dots) to ensure the correct fact is found - return false unless fact_with_wildcard || fact_name.match("^#{Regexp.escape(processed_equery_fact)}($|\\.)") - - true + if fact_with_wildcard + # fact_name contains wildcard, so we're intentially not escaping. + query_fact.match("^#{fact_name}$") + else + processed_equery_fact = query_fact.gsub('\\', '\\\\\\\\') + # Must escape metacharacters (like dots) to ensure the correct fact is found + fact_name.match("^#{Regexp.escape(processed_equery_fact)}($|\\.)") + end end def construct_loaded_fact(query_tokens, loaded_fact) From 05d1407844b8588243591cc089f3922f342e4f08 Mon Sep 17 00:00:00 2001 From: Josh Cooper Date: Thu, 7 Dec 2023 22:38:47 -0800 Subject: [PATCH 4/5] Call Module#const_get once per Module --- lib/facter/framework/core/fact_loaders/class_discoverer.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/facter/framework/core/fact_loaders/class_discoverer.rb b/lib/facter/framework/core/fact_loaders/class_discoverer.rb index ac140f4dc8..2b34690a34 100644 --- a/lib/facter/framework/core/fact_loaders/class_discoverer.rb +++ b/lib/facter/framework/core/fact_loaders/class_discoverer.rb @@ -21,9 +21,10 @@ def discover_classes(operating_system) def find_nested_classes(mod, discovered_classes) mod.constants.each do |constant_name| - if mod.const_get(constant_name).instance_of? Class - discovered_classes << mod.const_get(constant_name) - elsif mod.const_get(constant_name).instance_of? Module + obj = mod.const_get(constant_name) + if obj.instance_of? Class + discovered_classes << obj + elsif obj.instance_of? Module find_nested_classes(Module.const_get("#{mod.name}::#{constant_name}"), discovered_classes) end end From a6cfedcad2d8b95135f76a193f419e974f318b15 Mon Sep 17 00:00:00 2001 From: Josh Cooper Date: Fri, 8 Dec 2023 14:43:28 -0800 Subject: [PATCH 5/5] Add data flow --- docs/data-flow.md | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 docs/data-flow.md diff --git a/docs/data-flow.md b/docs/data-flow.md new file mode 100644 index 0000000000..c88acdb2d5 --- /dev/null +++ b/docs/data-flow.md @@ -0,0 +1,39 @@ +## Data Flow + +This shows the general data flow when a user runs facter on the command to lookup facts `a.b` and `c`. + +Generally, facter loads fact definitions (`LoadedFact`) to determine all of the things it could collect, including internal (aka core) facts, custom facts (implemented using the `Facter.add` API) and external facts (json, yaml, bash, etc). Each `LoadedFact` specifies a name like `os.family` and a class that can be called later to collect the values, e.g. `Facts::Linux::Os::Release`. + +The `QueryParser` parse both user queries `a.b` and `c` and matches each query against all LoadedFacts, returning an array of `SearchedFacts`. These are more like SearchableFacts, since they haven't been searched yet. + +Facter attempts to lookup the facts from the cache, otherwise it calls the `InternalFactManager` and `ExternalFactManager` to resolve facts. + +For internal facts, facter wraps each `SearchedFact` with a `CoreFact`. The `CoreFact` calls the `call_the_resolver` method on the class that the `SearchedFact` references. The `call_the_resolver` method then typically delegates to a resolver and returns the fact value which may be scalar or structured data. For example, `os.family` returns a string, but `gce` returns a Hash. + +```mermaid +flowchart TD + CLI[facter a.b c] --> Facter[Facter.to_user_output] + Facter --> FactManager[FactManager#resolve_facts] + FactManager --> FactLoader[FactLoader.load] + FactLoader -->|internal| InternalFactLoader[InternalLoader.core_facts] + FactLoader -->|custom| CustomFactLoader[ExternalFactLoader.custom_facts] + FactLoader -->|external| ExternalFactLoader[ExternalFactLoader.external_facts] + InternalFactLoader --> QueryParser[QueryParser.parse] + CustomFactLoader --> QueryParser + ExternalFactLoader --> QueryParser + QueryParser -->|empty query| AllSearchable[All loaded facts are searchable] + QueryParser -->|not empty| SomeSearchable[Match query tokens to loaded facts] + AllSearchable --> SearchedFacts[Array of SearchedFacts] + SomeSearchable --> SearchedFacts + SearchedFacts --> CacheManager[CacheManager.resolve_facts] + CacheManager -->|internal| InternalFactManager[InternalFactManager.resolve_facts] + InternalFactManager --> CoreFact[CoreFact#create] + CoreFact --> SearchedFact[SearchedFact -> call_the_resolver] + SearchedFact --> Resolvers[Facter::Resolvers::*.resolve] + CacheManager -->|external| ExternalFactManager[ExternalFactManager.resolve_facts] + Resolvers --> ResolvedFacts[Array of ResolvedFacts] + ExternalFactManager --> ResolvedFacts + ResolvedFacts --> CacheFacts[CacheManager.cache_facts] + CacheFacts --> FilterFacts[FactFilter#filter_facts!] + FilterFacts --> Formatter +```