Skip to content

Commit

Permalink
refactor: Compute joinProjectedVarColumnsSize by taking in a vector<r…
Browse files Browse the repository at this point in the history
…ows> instead of by row [2/n] (#12235)

Summary:
Pull Request resolved: #12235

Instead of incurring many indirections to get the fixedWidthType in joinProjectedVarColumnsSize by using row by row computation, let's instead pass in the whole vector<rows>

This allows us to only do the indirection look up once.

Reviewed By: Yuhta

Differential Revision: D68984669

fbshipit-source-id: e6fcc8b8f2653eef6ca2659f6aea92d8d6c9bead
  • Loading branch information
yuandagits authored and facebook-github-bot committed Feb 4, 2025
1 parent ccb5094 commit 8bc8897
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
23 changes: 18 additions & 5 deletions velox/exec/HashTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1817,6 +1817,21 @@ inline uint64_t HashTable<ignoreNullKeys>::joinProjectedVarColumnsSize(
return totalBytes;
}

template <bool ignoreNullKeys>
inline uint64_t HashTable<ignoreNullKeys>::joinProjectedVarColumnsSize(
const std::vector<vector_size_t>& columns,
NextRowVector*& rows) const {
uint64_t totalBytes{0};
for (const auto& column : columns) {
if (!rows_->columnTypes()[column]->isFixedWidth()) {
for (const auto* row : *rows) {
totalBytes += rows_->variableSizeAt(row, column);
}
}
}
return totalBytes;
}

template <bool ignoreNullKeys>
int32_t HashTable<ignoreNullKeys>::listJoinResults(
JoinResultIterator& iter,
Expand Down Expand Up @@ -1876,11 +1891,9 @@ int32_t HashTable<ignoreNullKeys>::listJoinResults(
if (iter.estimatedRowSize.has_value()) {
totalBytes += iter.estimatedRowSize.value() * numRows;
} else {
for (const auto* dupRow : *rows) {
totalBytes +=
joinProjectedVarColumnsSize(iter.varSizeListColumns, dupRow) +
iter.fixedSizeListColumnsSizeSum;
}
totalBytes +=
joinProjectedVarColumnsSize(iter.varSizeListColumns, rows);
totalBytes += (iter.fixedSizeListColumnsSizeSum * rows->size());
totalBytes += (iter.fixedSizeListColumnsSizeSum * numRows);
}
if (iter.lastDuplicateRowIndex >= numRows) {
Expand Down
7 changes: 7 additions & 0 deletions velox/exec/HashTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,13 @@ class HashTable : public BaseHashTable {
const std::vector<vector_size_t>& columns,
const char* row) const;

// The exact same as joinProjectedVarColumnsSize(const
// std::vector<vector_size_t>&, const char*) with the exception that this
// function takes in vector of rows instead of an individual row.
inline uint64_t joinProjectedVarColumnsSize(
const std::vector<vector_size_t>& columns,
NextRowVector*& rows) const;

// Adds a row to a hash join table in kArray hash mode. Returns true if a new
// entry was made and false if the row was added to an existing set of rows
// with the same key. 'allocator' is provided for duplicate row vector
Expand Down

0 comments on commit 8bc8897

Please sign in to comment.