Skip to content

Commit

Permalink
Add a clang-tidy script.
Browse files Browse the repository at this point in the history
  • Loading branch information
hzeller committed Apr 28, 2024
1 parent 7be3b82 commit 1e5bfc7
Show file tree
Hide file tree
Showing 3 changed files with 336 additions and 1 deletion.
43 changes: 43 additions & 0 deletions .clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# We're not using readability-braces-around-statements as we using
# the google-* version of that which is more sensible (allows one-liners
# without braces).
#
# NOTE: Below, there must be no comments, otherwise clang tidy silently ignores
# rules... (ask me how I know).
###
Checks: >
clang-diagnostic-*,
-clang-diagnostic-unknown-pragmas,
clang-analyzer-*,
abseil-*,
readability-*,
-readability-avoid-unconditional-preprocessor-if,
-readability-braces-around-statements,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-static-definition-in-anonymous-namespace,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
google-*,
-google-readability-casting,
-google-readability-todo,
performance-*,
bugprone-*,
-bugprone-easily-swappable-parameters,
-bugprone-narrowing-conversions,
modernize-*,
-modernize-avoid-c-arrays,
-modernize-make-unique,
-modernize-use-auto,
-modernize-use-nodiscard,
-modernize-use-std-print,
-modernize-use-trailing-return-type,
misc-*,
-misc-no-recursion,
-misc-unused-parameters,
-misc-use-anonymous-namespace,
ExtraArgs:
- -Wno-unknown-pragmas
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,14 @@ To get a useful compilation database for `clangd` to be happy, run first
scripts/make-compilation-db.sh
```

Before submit, run `scripts/run-format.sh`.
Before submit, run
```
scripts/run-format.sh
scripts/run-clang-tidy-cached.cc
```

... and fix potential `clang-tidy` issues (or update `.clang-tidy` if it is
not useful).

[bazel]: https://bazel.build/
[buildozer]: https://github.com/bazelbuild/buildtools/blob/master/buildozer/README.md
Expand Down
285 changes: 285 additions & 0 deletions scripts/run-clang-tidy-cached.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
#if 0 // Invoke with /bin/sh or simply add executable bit on this file on Unix.
B=${0%%.cc}; [ "$B" -nt "$0" ] || c++ -std=c++17 -o"$B" "$0" && exec "$B" "$@";
#endif
// Copyright 2023 Henner Zeller <[email protected]>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Script to run clang-tidy on files in a bazel project while caching the
// results as clang-tidy can be pretty slow. The clang-tidy output messages
// are content-addressed in a hash(cc-file-content) cache file.
// Should run on any system with a shell that provides 2>/dev/null redirect.
//
// Invocation without parameters simply uses the .clang-tidy config to run on
// all *.{cc,h} files. Additional parameters passed to this script are passed
// to clang-tidy as-is. Typical use could be for instance
// run-clang-tidy-cached.cc --checks="-*,modernize-use-override" --fix

// This file shall be self-contined, so we don't use any re2 or absl niceties
#include <algorithm>
#include <cinttypes>
#include <csignal>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <functional>
#include <iostream>
#include <list>
#include <map>
#include <mutex>
#include <optional>
#include <regex>
#include <string>
#include <string_view>
#include <system_error>
#include <thread>
#include <utility>
#include <vector>

// Some configuration for this project.
static const std::string kProjectCachePrefix = "bant_";
static constexpr std::string_view kWorkspaceFile = "MODULE.bazel";

static constexpr std::string_view kSearchDir = "bant/";
static const std::string kFileExcludeRe = ".git/|.github/|scripts/";

namespace fs = std::filesystem;
using filepath_contenthash_t = std::pair<fs::path, uint64_t>;
using ReIt = std::sregex_iterator;

// Some helpers
std::string GetContent(FILE *f) {
std::string result;
if (!f) return result; // ¯\_(ツ)_/¯ best effort.
char buf[4096];
while (const size_t r = fread(buf, 1, sizeof(buf), f)) {
result.append(buf, r);
}
fclose(f);
return result;
}

std::string GetContent(const fs::path &f) {
return GetContent(fopen(f.string().c_str(), "r"));
}

using hash_t = uint64_t;
hash_t hash(const std::string &s) { return std::hash<std::string>()(s); }
std::string toHex(uint64_t value, int show_lower_nibbles = 16) {
char out[16 + 1];
snprintf(out, sizeof(out), "%016" PRIx64, value);
return out + (16 - show_lower_nibbles);
}

std::optional<std::string> ReadAndVerifyTidyConfig(const fs::path &config) {
auto content = GetContent(config);
auto start_config = content.find("\nChecks:");
if (start_config == std::string::npos) {
std::cerr << "Not seen 'Checks:' in config " << config << "\n";
return std::nullopt;
}
if (content.find('#', start_config) != std::string::npos) {
std::cerr << "Comment found in check section of " << config << "\n";
return std::nullopt;
}
return content.substr(start_config);
}

fs::path GetCacheDir() {
if (const char *from_env = getenv("CACHE_DIR")) return fs::path{from_env};
if (const char *home = getenv("HOME")) {
if (auto cdir = fs::path(home) / ".cache/"; fs::exists(cdir)) return cdir;
}
return fs::path{getenv("TMPDIR") ?: "/tmp"};
}

// Fix filename paths that are not emitted relative to project root.
void CanonicalizeSourcePaths(const fs::path &infile, const fs::path &outfile) {
static const std::regex sFixPathsRe = []() {
std::string canonicalize_expr = "(^|\\n)("; // fix names at start of line
auto root = GetContent(popen("bazel info execution_root 2>/dev/null", "r"));
if (!root.empty()) {
root.pop_back(); // remove newline.
canonicalize_expr += root + "/|";
}
canonicalize_expr += fs::current_path().string() + "/"; // $(pwd)/
canonicalize_expr += ")?(\\./)?"; // Some start with, or have a trailing ./
return std::regex{canonicalize_expr};
}();
const auto in_content = GetContent(infile);
std::fstream out_stream(outfile, std::ios::out);
out_stream << std::regex_replace(in_content, sFixPathsRe, "$1");
}

// Given a work-queue in/out-file, process it. Using system() for portability.
void ClangTidyProcessFiles(const fs::path &content_dir, const std::string &cmd,
std::list<filepath_contenthash_t> *work_queue) {
if (work_queue->empty()) return;
const int kJobs = std::thread::hardware_concurrency();
std::cerr << work_queue->size() << " files to process...";

std::mutex queue_access_lock;
auto clang_tidy_runner = [&]() {
for (;;) {
filepath_contenthash_t work;
{
const std::lock_guard<std::mutex> lock(queue_access_lock);
if (work_queue->empty()) return;
fprintf(stderr, "%5d\b\b\b\b\b", static_cast<int>(work_queue->size()));
work = work_queue->front();
work_queue->pop_front();
}
const fs::path final_out = content_dir / toHex(work.second);
const std::string tmp_out = final_out.string() + ".tmp";
const std::string command = cmd + " '" + work.first.string() + "'" +
"> '" + tmp_out + "' 2>/dev/null";
const int r = system(command.c_str());
#ifdef WIFSIGNALED
// NOLINTBEGIN
if (WIFSIGNALED(r) && (WTERMSIG(r) == SIGINT || WTERMSIG(r) == SIGQUIT)) {
break; // got Ctrl-C
}
// NOLINTEND
#endif
CanonicalizeSourcePaths(tmp_out, tmp_out);
fs::rename(tmp_out, final_out); // atomic replacement
}
};
std::vector<std::thread> workers;
for (auto i = 0; i < kJobs; ++i) {
workers.emplace_back(clang_tidy_runner); // NOLINT
}
for (auto &t : workers) t.join();
fprintf(stderr, " \n"); // Clean out progress counter.
}

int main(int argc, char *argv[]) {
const std::string kTidySymlink = kProjectCachePrefix + "clang-tidy.out";
const fs::path cache_dir = GetCacheDir() / "clang-tidy";

// Test that key files exist and remember their last change.
std::error_code ec;
const auto workspace_ts = fs::last_write_time(kWorkspaceFile, ec);
if (ec.value() != 0) {
std::cerr << "Script needs to be executed in toplevel bazel project dir\n";
return EXIT_FAILURE;
}
const auto compdb_ts = fs::last_write_time("compile_commands.json", ec);
if (ec.value() != 0) {
std::cerr << "No compilation db found. First, run make-compilation-db.sh\n";
return EXIT_FAILURE;
}
const auto build_env_latest_change = std::max(workspace_ts, compdb_ts);

const auto config = ReadAndVerifyTidyConfig(".clang-tidy");
if (!config) return EXIT_FAILURE;

// We'll invoke clang-tidy with all the additional flags user provides.
const std::string clang_tidy = getenv("CLANG_TIDY") ?: "clang-tidy";
std::string clang_tidy_invocation = clang_tidy + " --quiet";
clang_tidy_invocation.append(" \"--config=").append(*config).append("\"");
for (int i = 1; i < argc; ++i) {
clang_tidy_invocation.append(" \"").append(argv[i]).append("\"");
}

// Use major version as part of name of our configuration specific dir.
auto version = GetContent(popen((clang_tidy + " --version").c_str(), "r"));
std::smatch version_match;
const std::string major_version =
std::regex_search(version, version_match, std::regex{"version ([0-9]+)"})
? version_match[1].str()
: "UNKNOWN";

// Cache directory name based on configuration.
const fs::path project_base_dir =
cache_dir / fs::path(kProjectCachePrefix + "v" + major_version + "_" +
toHex(hash(version + clang_tidy_invocation), 8));
const fs::path tidy_outfile = project_base_dir / "tidy.out";
const fs::path content_dir = project_base_dir / "contents";
fs::create_directories(content_dir);
std::cerr << "Cache dir " << project_base_dir << "\n";

// Gather all *.cc and *.h files; remember content hashes of includes.
std::vector<filepath_contenthash_t> files_of_interest;
std::map<std::string, hash_t> header_hashes;
const std::regex exclude_re(kFileExcludeRe);
for (const auto &dir_entry : fs::recursive_directory_iterator(kSearchDir)) {
const fs::path &p = dir_entry.path().lexically_normal();
if (!fs::is_regular_file(p)) continue;
if (!kFileExcludeRe.empty() && std::regex_search(p.string(), exclude_re)) {
continue;
}
if (auto ext = p.extension(); ext == ".cc" || ext == ".h") {
files_of_interest.emplace_back(p, 0);
if (ext == ".h") header_hashes[p.string()] = hash(GetContent(p));
}
}
std::cerr << files_of_interest.size() << " files of interest.\n";

// Create content hash address. If any header a file depends on changes, we
// want to reprocess. So we make the hash dependent on header content as well.
std::list<filepath_contenthash_t> work_queue;
const std::regex inc_re("\"([0-9a-zA-Z_/-]+\\.h)\""); // match include
for (filepath_contenthash_t &f : files_of_interest) {
const auto content = GetContent(f.first);
f.second = hash(content);
for (ReIt it(content.begin(), content.end(), inc_re); it != ReIt(); ++it) {
const std::string &header_path = (*it)[1].str();
f.second ^= header_hashes[header_path];
}
const fs::path content_hash_file = content_dir / toHex(f.second);
// Recreate if we don't have it yet or if it contains messages but is
// older than WORKSPACE or compilation db. Maybe something got fixed.
if (!fs::exists(content_hash_file) ||
(fs::file_size(content_hash_file) > 0 &&
fs::last_write_time(content_hash_file) < build_env_latest_change)) {
work_queue.emplace_back(f);
}
}

// Run clang tidy in parallel on the files to process.
ClangTidyProcessFiles(content_dir, clang_tidy_invocation, &work_queue);

// Assemble the separate outputs into a single file. Tally up per-check stats.
const std::regex check_re("(\\[[a-zA-Z.-]+\\])\n");
std::map<std::string, int> checks_seen;
std::ofstream tidy_collect(tidy_outfile);
for (const filepath_contenthash_t &f : files_of_interest) {
const auto tidy = GetContent(content_dir / toHex(f.second));
if (!tidy.empty()) tidy_collect << f.first.string() << ":\n" << tidy;
for (ReIt it(tidy.begin(), tidy.end(), check_re); it != ReIt(); ++it) {
checks_seen[(*it)[1].str()]++;
}
}
std::error_code ignored_error;
fs::remove(kTidySymlink, ignored_error);
fs::create_symlink(tidy_outfile, kTidySymlink, ignored_error);

if (checks_seen.empty()) {
std::cerr << "No clang-tidy complaints. 😎\n";
} else {
std::cerr << "--- Summary --- (details in " << kTidySymlink << ")\n";
using check_count_t = std::pair<std::string, int>;
std::vector<check_count_t> by_count(checks_seen.begin(), checks_seen.end());
std::stable_sort(by_count.begin(), by_count.end(),
[](const check_count_t &a, const check_count_t &b) {
return b.second < a.second; // reverse count
});
for (const auto &counts : by_count) {
fprintf(stdout, "%5d %s\n", counts.second, counts.first.c_str());
}
}
return checks_seen.empty() ? EXIT_SUCCESS : EXIT_FAILURE;
}

0 comments on commit 1e5bfc7

Please sign in to comment.