diff --git a/velox/exec/HashTable.cpp b/velox/exec/HashTable.cpp index a86338e28b06..79a6c55bd421 100644 --- a/velox/exec/HashTable.cpp +++ b/velox/exec/HashTable.cpp @@ -1817,6 +1817,21 @@ inline uint64_t HashTable::joinProjectedVarColumnsSize( return totalBytes; } +template +inline uint64_t HashTable::joinProjectedVarColumnsSize( + const std::vector& columns, + NextRowVector*& rows) const { + uint64_t totalBytes{0}; + for (const auto& column : columns) { + if (!rows_->columnTypes()[column]->isFixedWidth()) { + for (const auto* row : *rows) { + totalBytes += rows_->variableSizeAt(row, column); + } + } + } + return totalBytes; +} + template int32_t HashTable::listJoinResults( JoinResultIterator& iter, @@ -1876,11 +1891,9 @@ int32_t HashTable::listJoinResults( if (iter.estimatedRowSize.has_value()) { totalBytes += iter.estimatedRowSize.value() * numRows; } else { - for (const auto* dupRow : *rows) { - totalBytes += - joinProjectedVarColumnsSize(iter.varSizeListColumns, dupRow) + - iter.fixedSizeListColumnsSizeSum; - } + totalBytes += + joinProjectedVarColumnsSize(iter.varSizeListColumns, rows); + totalBytes += (iter.fixedSizeListColumnsSizeSum * rows->size()); totalBytes += (iter.fixedSizeListColumnsSizeSum * numRows); } if (iter.lastDuplicateRowIndex >= numRows) { diff --git a/velox/exec/HashTable.h b/velox/exec/HashTable.h index 56f045c52afc..a5d14dff54a7 100644 --- a/velox/exec/HashTable.h +++ b/velox/exec/HashTable.h @@ -902,6 +902,13 @@ class HashTable : public BaseHashTable { const std::vector& columns, const char* row) const; + // The exact same as joinProjectedVarColumnsSize(const + // std::vector&, const char*) with the exception that this + // function takes in vector of rows instead of an individual row. + inline uint64_t joinProjectedVarColumnsSize( + const std::vector& columns, + NextRowVector*& rows) const; + // Adds a row to a hash join table in kArray hash mode. Returns true if a new // entry was made and false if the row was added to an existing set of rows // with the same key. 'allocator' is provided for duplicate row vector