diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..6184485 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,43 @@ +# We're not using readability-braces-around-statements as we using +# the google-* version of that which is more sensible (allows one-liners +# without braces). +# +# NOTE: Below, there must be no comments, otherwise clang tidy silently ignores +# rules... (ask me how I know). +### +Checks: > + clang-diagnostic-*, + -clang-diagnostic-unknown-pragmas, + clang-analyzer-*, + abseil-*, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-braces-around-statements, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-static-definition-in-anonymous-namespace, + -readability-uppercase-literal-suffix, + -readability-use-anyofallof, + google-*, + -google-readability-casting, + -google-readability-todo, + performance-*, + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-narrowing-conversions, + modernize-*, + -modernize-avoid-c-arrays, + -modernize-make-unique, + -modernize-use-auto, + -modernize-use-nodiscard, + -modernize-use-std-print, + -modernize-use-trailing-return-type, + misc-*, + -misc-no-recursion, + -misc-unused-parameters, + -misc-use-anonymous-namespace, + +ExtraArgs: + - -Wno-unknown-pragmas diff --git a/README.md b/README.md index 71ee783..ce02cf8 100644 --- a/README.md +++ b/README.md @@ -160,7 +160,14 @@ To get a useful compilation database for `clangd` to be happy, run first scripts/make-compilation-db.sh ``` -Before submit, run `scripts/run-format.sh`. +Before submit, run +``` +scripts/run-format.sh +scripts/run-clang-tidy-cached.cc +``` + +... and fix potential `clang-tidy` issues (or update `.clang-tidy` if it is +not useful). [bazel]: https://bazel.build/ [buildozer]: https://github.com/bazelbuild/buildtools/blob/master/buildozer/README.md diff --git a/scripts/run-clang-tidy-cached.cc b/scripts/run-clang-tidy-cached.cc new file mode 100755 index 0000000..743618e --- /dev/null +++ b/scripts/run-clang-tidy-cached.cc @@ -0,0 +1,285 @@ +#if 0 // Invoke with /bin/sh or simply add executable bit on this file on Unix. +B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@"; +#endif +// Copyright 2023 Henner Zeller +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Script to run clang-tidy on files in a bazel project while caching the +// results as clang-tidy can be pretty slow. The clang-tidy output messages +// are content-addressed in a hash(cc-file-content) cache file. +// Should run on any system with a shell that provides 2>/dev/null redirect. +// +// Invocation without parameters simply uses the .clang-tidy config to run on +// all *.{cc,h} files. Additional parameters passed to this script are passed +// to clang-tidy as-is. Typical use could be for instance +// run-clang-tidy-cached.cc --checks="-*,modernize-use-override" --fix + +// This file shall be self-contined, so we don't use any re2 or absl niceties +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Some configuration for this project. +static const std::string kProjectCachePrefix = "bant_"; +static constexpr std::string_view kWorkspaceFile = "MODULE.bazel"; + +static constexpr std::string_view kSearchDir = "bant/"; +static const std::string kFileExcludeRe = ".git/|.github/|scripts/"; + +namespace fs = std::filesystem; +using filepath_contenthash_t = std::pair; +using ReIt = std::sregex_iterator; + +// Some helpers +std::string GetContent(FILE *f) { + std::string result; + if (!f) return result; // ¯\_(ツ)_/¯ best effort. + char buf[4096]; + while (const size_t r = fread(buf, 1, sizeof(buf), f)) { + result.append(buf, r); + } + fclose(f); + return result; +} + +std::string GetContent(const fs::path &f) { + return GetContent(fopen(f.string().c_str(), "r")); +} + +using hash_t = uint64_t; +hash_t hash(const std::string &s) { return std::hash()(s); } +std::string toHex(uint64_t value, int show_lower_nibbles = 16) { + char out[16 + 1]; + snprintf(out, sizeof(out), "%016" PRIx64, value); + return out + (16 - show_lower_nibbles); +} + +std::optional ReadAndVerifyTidyConfig(const fs::path &config) { + auto content = GetContent(config); + auto start_config = content.find("\nChecks:"); + if (start_config == std::string::npos) { + std::cerr << "Not seen 'Checks:' in config " << config << "\n"; + return std::nullopt; + } + if (content.find('#', start_config) != std::string::npos) { + std::cerr << "Comment found in check section of " << config << "\n"; + return std::nullopt; + } + return content.substr(start_config); +} + +fs::path GetCacheDir() { + if (const char *from_env = getenv("CACHE_DIR")) return fs::path{from_env}; + if (const char *home = getenv("HOME")) { + if (auto cdir = fs::path(home) / ".cache/"; fs::exists(cdir)) return cdir; + } + return fs::path{getenv("TMPDIR") ?: "/tmp"}; +} + +// Fix filename paths that are not emitted relative to project root. +void CanonicalizeSourcePaths(const fs::path &infile, const fs::path &outfile) { + static const std::regex sFixPathsRe = []() { + std::string canonicalize_expr = "(^|\\n)("; // fix names at start of line + auto root = GetContent(popen("bazel info execution_root 2>/dev/null", "r")); + if (!root.empty()) { + root.pop_back(); // remove newline. + canonicalize_expr += root + "/|"; + } + canonicalize_expr += fs::current_path().string() + "/"; // $(pwd)/ + canonicalize_expr += ")?(\\./)?"; // Some start with, or have a trailing ./ + return std::regex{canonicalize_expr}; + }(); + const auto in_content = GetContent(infile); + std::fstream out_stream(outfile, std::ios::out); + out_stream << std::regex_replace(in_content, sFixPathsRe, "$1"); +} + +// Given a work-queue in/out-file, process it. Using system() for portability. +void ClangTidyProcessFiles(const fs::path &content_dir, const std::string &cmd, + std::list *work_queue) { + if (work_queue->empty()) return; + const int kJobs = std::thread::hardware_concurrency(); + std::cerr << work_queue->size() << " files to process..."; + + std::mutex queue_access_lock; + auto clang_tidy_runner = [&]() { + for (;;) { + filepath_contenthash_t work; + { + const std::lock_guard lock(queue_access_lock); + if (work_queue->empty()) return; + fprintf(stderr, "%5d\b\b\b\b\b", static_cast(work_queue->size())); + work = work_queue->front(); + work_queue->pop_front(); + } + const fs::path final_out = content_dir / toHex(work.second); + const std::string tmp_out = final_out.string() + ".tmp"; + const std::string command = cmd + " '" + work.first.string() + "'" + + "> '" + tmp_out + "' 2>/dev/null"; + const int r = system(command.c_str()); +#ifdef WIFSIGNALED + // NOLINTBEGIN + if (WIFSIGNALED(r) && (WTERMSIG(r) == SIGINT || WTERMSIG(r) == SIGQUIT)) { + break; // got Ctrl-C + } + // NOLINTEND +#endif + CanonicalizeSourcePaths(tmp_out, tmp_out); + fs::rename(tmp_out, final_out); // atomic replacement + } + }; + std::vector workers; + for (auto i = 0; i < kJobs; ++i) { + workers.emplace_back(clang_tidy_runner); // NOLINT + } + for (auto &t : workers) t.join(); + fprintf(stderr, " \n"); // Clean out progress counter. +} + +int main(int argc, char *argv[]) { + const std::string kTidySymlink = kProjectCachePrefix + "clang-tidy.out"; + const fs::path cache_dir = GetCacheDir() / "clang-tidy"; + + // Test that key files exist and remember their last change. + std::error_code ec; + const auto workspace_ts = fs::last_write_time(kWorkspaceFile, ec); + if (ec.value() != 0) { + std::cerr << "Script needs to be executed in toplevel bazel project dir\n"; + return EXIT_FAILURE; + } + const auto compdb_ts = fs::last_write_time("compile_commands.json", ec); + if (ec.value() != 0) { + std::cerr << "No compilation db found. First, run make-compilation-db.sh\n"; + return EXIT_FAILURE; + } + const auto build_env_latest_change = std::max(workspace_ts, compdb_ts); + + const auto config = ReadAndVerifyTidyConfig(".clang-tidy"); + if (!config) return EXIT_FAILURE; + + // We'll invoke clang-tidy with all the additional flags user provides. + const std::string clang_tidy = getenv("CLANG_TIDY") ?: "clang-tidy"; + std::string clang_tidy_invocation = clang_tidy + " --quiet"; + clang_tidy_invocation.append(" \"--config=").append(*config).append("\""); + for (int i = 1; i < argc; ++i) { + clang_tidy_invocation.append(" \"").append(argv[i]).append("\""); + } + + // Use major version as part of name of our configuration specific dir. + auto version = GetContent(popen((clang_tidy + " --version").c_str(), "r")); + std::smatch version_match; + const std::string major_version = + std::regex_search(version, version_match, std::regex{"version ([0-9]+)"}) + ? version_match[1].str() + : "UNKNOWN"; + + // Cache directory name based on configuration. + const fs::path project_base_dir = + cache_dir / fs::path(kProjectCachePrefix + "v" + major_version + "_" + + toHex(hash(version + clang_tidy_invocation), 8)); + const fs::path tidy_outfile = project_base_dir / "tidy.out"; + const fs::path content_dir = project_base_dir / "contents"; + fs::create_directories(content_dir); + std::cerr << "Cache dir " << project_base_dir << "\n"; + + // Gather all *.cc and *.h files; remember content hashes of includes. + std::vector files_of_interest; + std::map header_hashes; + const std::regex exclude_re(kFileExcludeRe); + for (const auto &dir_entry : fs::recursive_directory_iterator(kSearchDir)) { + const fs::path &p = dir_entry.path().lexically_normal(); + if (!fs::is_regular_file(p)) continue; + if (!kFileExcludeRe.empty() && std::regex_search(p.string(), exclude_re)) { + continue; + } + if (auto ext = p.extension(); ext == ".cc" || ext == ".h") { + files_of_interest.emplace_back(p, 0); + if (ext == ".h") header_hashes[p.string()] = hash(GetContent(p)); + } + } + std::cerr << files_of_interest.size() << " files of interest.\n"; + + // Create content hash address. If any header a file depends on changes, we + // want to reprocess. So we make the hash dependent on header content as well. + std::list work_queue; + const std::regex inc_re("\"([0-9a-zA-Z_/-]+\\.h)\""); // match include + for (filepath_contenthash_t &f : files_of_interest) { + const auto content = GetContent(f.first); + f.second = hash(content); + for (ReIt it(content.begin(), content.end(), inc_re); it != ReIt(); ++it) { + const std::string &header_path = (*it)[1].str(); + f.second ^= header_hashes[header_path]; + } + const fs::path content_hash_file = content_dir / toHex(f.second); + // Recreate if we don't have it yet or if it contains messages but is + // older than WORKSPACE or compilation db. Maybe something got fixed. + if (!fs::exists(content_hash_file) || + (fs::file_size(content_hash_file) > 0 && + fs::last_write_time(content_hash_file) < build_env_latest_change)) { + work_queue.emplace_back(f); + } + } + + // Run clang tidy in parallel on the files to process. + ClangTidyProcessFiles(content_dir, clang_tidy_invocation, &work_queue); + + // Assemble the separate outputs into a single file. Tally up per-check stats. + const std::regex check_re("(\\[[a-zA-Z.-]+\\])\n"); + std::map checks_seen; + std::ofstream tidy_collect(tidy_outfile); + for (const filepath_contenthash_t &f : files_of_interest) { + const auto tidy = GetContent(content_dir / toHex(f.second)); + if (!tidy.empty()) tidy_collect << f.first.string() << ":\n" << tidy; + for (ReIt it(tidy.begin(), tidy.end(), check_re); it != ReIt(); ++it) { + checks_seen[(*it)[1].str()]++; + } + } + std::error_code ignored_error; + fs::remove(kTidySymlink, ignored_error); + fs::create_symlink(tidy_outfile, kTidySymlink, ignored_error); + + if (checks_seen.empty()) { + std::cerr << "No clang-tidy complaints. 😎\n"; + } else { + std::cerr << "--- Summary --- (details in " << kTidySymlink << ")\n"; + using check_count_t = std::pair; + std::vector by_count(checks_seen.begin(), checks_seen.end()); + std::stable_sort(by_count.begin(), by_count.end(), + [](const check_count_t &a, const check_count_t &b) { + return b.second < a.second; // reverse count + }); + for (const auto &counts : by_count) { + fprintf(stdout, "%5d %s\n", counts.second, counts.first.c_str()); + } + } + return checks_seen.empty() ? EXIT_SUCCESS : EXIT_FAILURE; +}