From 05e64d7b32d6c5e5df34dd386916fee92cfadd2c Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 22 Dec 2024 22:21:18 +0800 Subject: [PATCH 01/37] init sdf eval stuff --- src/CMakeLists.txt | 3 + src/sdf/interval.h | 202 +++++++++++++++++++++++++++++++++ src/sdf/tape.h | 277 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 482 insertions(+) create mode 100644 src/sdf/interval.h create mode 100644 src/sdf/tape.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2dff72d90..4dc3fe8e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -51,6 +51,9 @@ set( tri_dist.h utils.h vec.h + sdf/eval.h + sdf/interval.h + sdf/tape.h ) # Include directories diff --git a/src/sdf/interval.h b/src/sdf/interval.h new file mode 100644 index 000000000..d2b6ddbae --- /dev/null +++ b/src/sdf/interval.h @@ -0,0 +1,202 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include + +#include "manifold/common.h" + +namespace manifold::sdf { + +// not really a precise implementation... +template +struct Interval { + Domain lower; + Domain upper; + + static Interval constant(Domain v) { return {v, v}; } + + constexpr Interval operator+(const Interval &other) const { + return {lower + other.lower, upper + other.upper}; + } + + constexpr Interval operator-() const { return {-upper, -lower}; } + + constexpr Interval operator-(const Interval &other) const { + return *this + (-other); + } + + Interval operator*(const Interval &other) const { + Domain a1b1 = lower * other.lower; + Domain a2b2 = upper * other.upper; + // we can write more "fast paths", but at some point it will become slower + // than just going the general path... + if (lower >= 0.0 && other.lower >= 0.0) + return {a1b1, a2b2}; + else if (upper <= 0.0 && other.upper <= 0.0) + return {a2b2, a1b1}; + + Domain a1b2 = lower * other.upper; + Domain a2b1 = upper * other.lower; + return {std::min(std::min(a1b1, a1b2), std::min(a2b1, a2b2)), + std::max(std::max(a1b1, a1b2), std::max(a2b1, a2b2))}; + } + + Interval operator*(double d) const { + if (d > 0) return {lower * d, upper * d}; + return {upper * d, lower * d}; + } + + Interval operator/(const Interval &other) const { + if (other.is_const()) return *this / other.lower; + constexpr Domain zero = static_cast(0); + constexpr Domain infty = std::numeric_limits::infinity(); + Interval reci; + if (other.lower >= zero || other.upper <= zero) { + reci.lower = other.upper == zero ? -infty : (1 / other.upper); + reci.upper = other.lower == zero ? infty : (1 / other.lower); + } else { + reci.lower = -infty; + reci.upper = infty; + } + return *this * reci; + } + + Interval operator/(double d) const { + if (d > 0) return {lower / d, upper / d}; + return {upper / d, lower / d}; + } + + constexpr bool is_const() const { return lower == upper; } + + constexpr Interval operator==(const Interval &other) const { + if (is_const() && other.is_const() && lower == other.lower) + return constant(1); // must be equal + if (lower > other.upper || upper < other.lower) + return constant(0); // disjoint, cannot possibly be equal + return {0, 1}; + } + + constexpr bool operator==(double d) const { return is_const() && lower == d; } + + constexpr Interval operator>(const Interval &other) const { + if (lower > other.upper) return constant(1); + if (upper < other.lower) return constant(0); + return {0, 1}; + } + + constexpr Interval operator<(const Interval &other) const { + if (upper < other.lower) return constant(1); + if (lower > other.upper) return constant(0); + return {0, 1}; + } + + constexpr Interval min(const Interval &other) const { + return {std::min(lower, other.lower), std::min(upper, other.upper)}; + } + + constexpr Interval max(const Interval &other) const { + return {std::max(lower, other.lower), std::max(upper, other.upper)}; + } + + constexpr Interval merge(const Interval &other) const { + return {std::min(lower, other.lower), std::max(upper, other.upper)}; + } + + template + constexpr Interval monotone_map(F f) const { + if (is_const()) return constant(f(lower)); + return {f(lower), f(upper)}; + } + + template + constexpr Interval antimonotone_map(F f) const { + if (is_const()) return constant(f(lower)); + return {f(upper), f(lower)}; + } + + constexpr Interval abs() const { + if (lower >= 0) return *this; + if (upper <= 0) return {-upper, -lower}; + return {0.0, std::max(-lower, upper)}; + } + + constexpr Interval mod(double m) const { + // FIXME: cannot deal with negative m right now... + Domain diff = std::fmod(lower, m); + if (diff < 0) diff += m; + Domain cycle_min = lower - diff; + // may be disjoint intervals, but we don't deal with that... + if (upper - cycle_min >= m) return {0.0, m}; + return {diff, upper - cycle_min}; + } + + constexpr Interval logical_and(const Interval &other) const { + return {lower == 0.0 || other.lower == 0.0 ? 0.0 : 1.0, + upper == 1.0 && other.upper == 1.0 ? 1.0 : 0.0}; + } + + constexpr Interval logical_or(const Interval &other) const { + return {lower == 0.0 && other.lower == 0.0 ? 0.0 : 1.0, + upper == 1.0 || other.upper == 1.0 ? 1.0 : 0.0}; + } + + constexpr Interval sin() const { + if (is_const()) return constant(std::sin(lower)); + // largely similar to cos + int64_t min_pis = static_cast(std::floor((lower - kHalfPi) / kPi)); + int64_t max_pis = static_cast(std::floor((upper - kHalfPi) / kPi)); + + bool not_cross_pos_1 = + (min_pis % 2 == 0) ? max_pis - min_pis <= 1 : max_pis == min_pis; + bool not_cross_neg_1 = + (min_pis % 2 == 0) ? max_pis == min_pis : max_pis - min_pis <= 1; + + Domain new_min = + not_cross_neg_1 ? std::min(std::sin(lower), std::sin(upper)) : -1.0; + Domain new_max = + not_cross_pos_1 ? std::max(std::sin(lower), std::sin(upper)) : 1.0; + return {new_min, new_max}; + } + + constexpr Interval cos() const { + if (is_const()) return constant(std::cos(lower)); + int64_t min_pis = static_cast(std::floor(lower / kPi)); + int64_t max_pis = static_cast(std::floor(upper / kPi)); + + bool not_cross_pos_1 = + (min_pis % 2 == 0) ? max_pis - min_pis <= 1 : max_pis == min_pis; + bool not_cross_neg_1 = + (min_pis % 2 == 0) ? max_pis == min_pis : max_pis - min_pis <= 1; + + Domain new_min = + not_cross_neg_1 ? std::min(std::cos(lower), std::cos(upper)) : -1.0; + Domain new_max = + not_cross_pos_1 ? std::max(std::cos(lower), std::cos(upper)) : 1.0; + return {new_min, new_max}; + } + + constexpr Interval tan() const { + if (is_const()) return constant(std::tan(lower)); + int64_t min_pis = static_cast(std::floor((lower + kHalfPi) / kPi)); + int64_t max_pis = static_cast(std::floor((upper + kHalfPi) / kPi)); + if (min_pis != max_pis) + return {-std::numeric_limits::infinity(), + std::numeric_limits::infinity()}; + return monotone_map([](Domain x) { return std::tan(x); }); + } +}; + +} // namespace manifold::sdf diff --git a/src/sdf/tape.h b/src/sdf/tape.h new file mode 100644 index 000000000..519a8efa3 --- /dev/null +++ b/src/sdf/tape.h @@ -0,0 +1,277 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +#include "interval.h" +#include "manifold/vec_view.h" + +namespace manifold::sdf { + +enum class OpCode : uint8_t { + NOP, + RETURN, + + // unary operations + ABS, + NEG, + EXP, + LOG, + SQRT, + FLOOR, + CEIL, + ROUND, + SIN, + COS, + TAN, + ASIN, + ACOS, + ATAN, + + // binary operations, + DIV, + MOD, + MIN, + MAX, + EQ, + GT, + AND, + OR, + + // fast binary operations + ADD, + SUB, + MUL, + // ternary operations + FMA, + CHOICE, +}; + +template +struct EvalContext { + VecView tape; + VecView buffer; + + Domain& operand(uint32_t x) { return buffer[x]; } + + static Domain handle_unary(OpCode op, Domain operand); + + static Domain handle_binary(OpCode op, Domain lhs, Domain rhs); + + static Domain handle_choice(Domain cond, Domain lhs, Domain rhs); + + Domain eval() { + size_t i = 0; + while (1) { + OpCode current = static_cast(tape[i]); + // fast binary/ternary operations + if (current >= OpCode::ADD) { + // loop is needed to force the compiler to use a tight code layout + do { + size_t result = tape[i + 1]; + Domain lhs = operand(tape[i + 2]); + Domain rhs = operand(tape[i + 3]); + i += 4; + if (current <= OpCode::MUL) { + if (current == OpCode::ADD) + operand(result) = lhs + rhs; + else if (current == OpCode::SUB) + operand(result) = lhs - rhs; + else + operand(result) = lhs * rhs; + } else { + Domain z = operand(tape[i++]); + if (current == OpCode::FMA) + operand(result) = lhs * rhs + z; + else + operand(result) = handle_choice(lhs, rhs, z); + } + current = static_cast(tape[i]); + } while (current >= OpCode::ADD); + } + if (current >= OpCode::DIV) { + Domain lhs = operand(tape[i + 2]); + Domain rhs = operand(tape[i + 3]); + operand(tape[i + 1]) = handle_binary(current, lhs, rhs); + i += 4; + } else if (current >= OpCode::ABS) { + Domain x = operand(tape[i + 2]); + operand(tape[i + 1]) = handle_unary(current, x); + i += 3; + } else if (current == OpCode::RETURN) { + return operand(tape[i + 1]); + } else { + i += 1; + } + } + } +}; + +template <> +inline double EvalContext::handle_unary(OpCode op, double x) { + switch (op) { + case OpCode::ABS: + return std::abs(x); + case OpCode::NEG: + return -x; + case OpCode::EXP: + return std::exp(x); + case OpCode::LOG: + return std::log(x); + case OpCode::SQRT: + return std::sqrt(x); + case OpCode::FLOOR: + return std::floor(x); + case OpCode::CEIL: + return std::ceil(x); + case OpCode::ROUND: + return std::round(x); + case OpCode::SIN: + return std::sin(x); + case OpCode::COS: + return std::cos(x); + case OpCode::TAN: + return std::tan(x); + case OpCode::ASIN: + return std::asin(x); + case OpCode::ACOS: + return std::acos(x); + case OpCode::ATAN: + return std::atan(x); + default: + return 0.0; + } +} + +template <> +inline double EvalContext::handle_binary(OpCode op, double lhs, + double rhs) { + switch (op) { + case OpCode::DIV: + return lhs / rhs; + case OpCode::MOD: + // FIXME: negative rhs??? + return std::fmod(std::fmod(lhs, rhs) + rhs, rhs); + case OpCode::MIN: + return std::min(lhs, rhs); + case OpCode::MAX: + return std::max(lhs, rhs); + case OpCode::EQ: + return lhs == rhs ? 1.0 : 0.0; + case OpCode::GT: + return lhs > rhs ? 1.0 : 0.0; + case OpCode::AND: + return (lhs == 1.0 && rhs == 1.0) ? 1.0 : 0.0; + case OpCode::OR: + return (lhs == 1.0 || rhs == 1.0) ? 1.0 : 0.0; + default: + return 0; + } +} + +template <> +inline double EvalContext::handle_choice(double cond, double lhs, + double rhs) { + if (cond == 1.0) return lhs; + return rhs; +} + +template <> +inline Interval EvalContext>::handle_unary( + OpCode op, Interval x) { + constexpr double infty = std::numeric_limits::infinity(); + switch (op) { + case OpCode::ABS: + return x.abs(); + case OpCode::NEG: + return -x; + case OpCode::EXP: + return x.monotone_map([](double v) { return std::exp(v); }); + case OpCode::LOG: + return x.monotone_map( + [infty](double v) { return v > 0.0 ? std::log(v) : -infty; }); + case OpCode::SQRT: + return x.monotone_map( + [infty](double v) { return v >= 0.0 ? std::sqrt(v) : 0.0; }); + case OpCode::FLOOR: + return x.monotone_map([](double v) { return std::floor(v); }); + case OpCode::CEIL: + return x.monotone_map([](double v) { return std::ceil(v); }); + case OpCode::ROUND: + return x.monotone_map([](double v) { return std::round(v); }); + case OpCode::SIN: + return x.sin(); + case OpCode::COS: + return x.cos(); + case OpCode::TAN: + return x.tan(); + case OpCode::ASIN: + return x.monotone_map([infty](double v) { + return v < -1.0 ? -infty : v > 1.0 ? infty : std::asin(v); + }); + case OpCode::ACOS: + return x.antimonotone_map([infty](double v) { + return v < -1.0 ? infty : v > 1.0 ? -infty : std::acos(v); + }); + case OpCode::ATAN: + return x.monotone_map([](double v) { return std::atan(v); }); + default: + return {0.0, 0.0}; + } +} + +template <> +inline Interval EvalContext>::handle_binary( + OpCode op, Interval lhs, Interval rhs) { + switch (op) { + case OpCode::DIV: + return lhs / rhs; + case OpCode::MOD: + return lhs.is_const() + ? lhs.mod(rhs.lower) + : (rhs.lower < 0 + ? Interval{rhs.lower, std::max(0.0, rhs.upper)} + : Interval{0, rhs.upper}); + case OpCode::MIN: + return lhs.min(rhs); + case OpCode::MAX: + return lhs.max(rhs); + case OpCode::EQ: + return lhs == rhs; + case OpCode::GT: + return lhs > rhs; + case OpCode::AND: + return lhs.logical_and(rhs); + case OpCode::OR: + return lhs.logical_or(rhs); + default: + return {0.0, 0.0}; + } +} + +template <> +inline Interval EvalContext>::handle_choice( + Interval cond, Interval lhs, Interval rhs) { + if (cond.is_const()) { + if (cond.lower == 1.0) return lhs; + return rhs; + } + return lhs.merge(rhs); +} + +} // namespace manifold::sdf From d065f1df1ba1bf3ae40dddca656516002f686080 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 01:11:15 +0800 Subject: [PATCH 02/37] initial codegen --- src/CMakeLists.txt | 5 +- src/sdf.cpp | 9 ++ src/sdf/context.cpp | 331 +++++++++++++++++++++++++++++++++++++++++ src/sdf/context.h | 71 +++++++++ src/sdf/interval.h | 5 + src/sdf/tape.h | 7 + src/sdf/value.cpp | 243 ++++++++++++++++++++++++++++++ src/sdf/value.h | 75 ++++++++++ test/CMakeLists.txt | 1 + test/sdf_tape_test.cpp | 118 +++++++++++++++ 10 files changed, 864 insertions(+), 1 deletion(-) create mode 100644 src/sdf/context.cpp create mode 100644 src/sdf/context.h create mode 100644 src/sdf/value.cpp create mode 100644 src/sdf/value.h create mode 100644 test/sdf_tape_test.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4dc3fe8e6..58615eab7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,8 @@ set( smoothing.cpp sort.cpp subdivision.cpp + sdf/value.cpp + sdf/context.cpp # optional source files $<$:cross_section/cross_section.cpp> $<$:meshIO/meshIO.cpp> @@ -51,9 +53,10 @@ set( tri_dist.h utils.h vec.h - sdf/eval.h sdf/interval.h sdf/tape.h + sdf/value.h + sdf/context.h ) # Include directories diff --git a/src/sdf.cpp b/src/sdf.cpp index 64dd7094d..389c0d85a 100644 --- a/src/sdf.cpp +++ b/src/sdf.cpp @@ -18,6 +18,7 @@ #include "./utils.h" #include "./vec.h" #include "manifold/manifold.h" +#include namespace { using namespace manifold; @@ -121,6 +122,8 @@ vec3 Bound(vec3 pos, vec3 origin, vec3 spacing, ivec3 gridSize) { return min(max(pos, origin), origin + spacing * (vec3(gridSize) - 1)); } +static std::atomic_int32_t sdf_counter; + double BoundedSDF(ivec4 gridIndex, vec3 origin, vec3 spacing, ivec3 gridSize, double level, std::function sdf) { const ivec3 xyz(gridIndex); @@ -131,6 +134,7 @@ double BoundedSDF(ivec4 gridIndex, vec3 origin, vec3 spacing, ivec3 gridSize, if (boundDist < 0) { return 0.0; } + sdf_counter.fetch_add(1, std::memory_order_relaxed); const double d = sdf(Position(gridIndex, origin, spacing)) - level; return boundDist == 0 ? std::min(d, 0.0) : d; } @@ -477,12 +481,17 @@ Manifold Manifold::LevelSet(std::function sdf, Box bounds, const vec3 origin = bounds.min; Vec voxels(maxIndex); + sdf_counter.store(0); + + auto start = std::chrono::high_resolution_clock::now(); for_each_n( pol, countAt(0_uz), maxIndex, [&voxels, sdf, level, origin, spacing, gridSize, gridPow](Uint64 idx) { voxels[idx] = BoundedSDF(DecodeIndex(idx, gridPow) - kVoxelOffset, origin, spacing, gridSize, level, sdf); }); + auto end = std::chrono::high_resolution_clock::now(); + printf("sdf evaluations: %d, %ld\n", sdf_counter.load(), std::chrono::duration_cast(end - start).count()); size_t tableSize = std::min( 2 * maxIndex, static_cast(10 * la::pow(maxIndex, 0.667))); diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp new file mode 100644 index 000000000..e90dffa89 --- /dev/null +++ b/src/sdf/context.cpp @@ -0,0 +1,331 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "context.h" + +#include +#include + +#include "manifold/optional_assert.h" + +namespace manifold::sdf { + +void dumpOpCode(OpCode op) { + switch (op) { + case OpCode::NOP: + std::cout << "NOP"; + break; + case OpCode::RETURN: + std::cout << "RETURN"; + break; + case OpCode::ABS: + std::cout << "ABS"; + break; + case OpCode::NEG: + std::cout << "NEG"; + break; + case OpCode::EXP: + std::cout << "EXP"; + break; + case OpCode::LOG: + std::cout << "LOG"; + break; + case OpCode::SQRT: + std::cout << "SQRT"; + break; + case OpCode::FLOOR: + std::cout << "FLOOR"; + break; + case OpCode::CEIL: + std::cout << "CEIL"; + break; + case OpCode::ROUND: + std::cout << "ROUND"; + break; + case OpCode::SIN: + std::cout << "SIN"; + break; + case OpCode::COS: + std::cout << "COS"; + break; + case OpCode::TAN: + std::cout << "TAN"; + break; + case OpCode::ASIN: + std::cout << "ASIN"; + break; + case OpCode::ACOS: + std::cout << "ACOS"; + break; + case OpCode::ATAN: + std::cout << "ATAN"; + break; + case OpCode::DIV: + std::cout << "DIV"; + break; + case OpCode::MOD: + std::cout << "MOD"; + break; + case OpCode::MIN: + std::cout << "MIN"; + break; + case OpCode::MAX: + std::cout << "MAX"; + break; + case OpCode::EQ: + std::cout << "EQ"; + break; + case OpCode::GT: + std::cout << "GT"; + break; + case OpCode::AND: + std::cout << "AND"; + break; + case OpCode::OR: + std::cout << "OR"; + break; + case OpCode::ADD: + std::cout << "ADD"; + break; + case OpCode::SUB: + std::cout << "SUB"; + break; + case OpCode::MUL: + std::cout << "MUL"; + break; + case OpCode::FMA: + std::cout << "FMA"; + break; + case OpCode::CHOICE: + std::cout << "CHOICE"; + break; + } +} + +void Context::dump() const { + for (size_t i = 0; i < operations.size(); i++) { + std::cout << i << " "; + dumpOpCode(operations[i]); + std::cout << " "; + for (Operand operand : operands[i]) { + if (operand.isNone()) break; + if (operand.isResult()) + std::cout << "r" << operand.toInstIndex(); + else if (operand.isConst()) + std::cout << constants[operand.toConstIndex()]; + else + std::cout << static_cast('X' - operand.id - 1); + std::cout << " "; + } + std::cout << "{"; + for (size_t use : opUses[i]) std::cout << use << " "; + std::cout << "}" << std::endl; + } + std::cout << "-----------" << std::endl; +} + +Operand Context::addConstant(double d) { + auto result = constantsIds.insert( + {d, Operand{-4 - static_cast(constants.size())}}); + if (result.second) { + constants.push_back(d); + constantUses.emplace_back(); + } + return result.first->second; +} + +// TODO: hashconsing +Operand Context::addInstruction(OpCode op, Operand a, Operand b, Operand c) { + // constant choice + if (op == OpCode::CHOICE && a.isConst()) { + if (constants[a.toConstIndex()] == 1.0) return b; + return c; + } + // constant propagation + bool all_constants = true; + for (auto operand : {a, b, c}) { + if (operand.isNone()) break; + if (!operand.isConst()) { + all_constants = false; + break; + } + } + if (all_constants) { + tmpTape.clear(); + tmpBuffer.clear(); + tmpTape.push_back(static_cast(op)); + tmpTape.push_back(0); + tmpBuffer.push_back(0.0); + for (Operand x : {a, b, c}) { + if (!x.isConst()) break; + tmpTape.push_back(tmpBuffer.size()); + tmpBuffer.push_back(constants[x.toConstIndex()]); + } + tmpTape.push_back(static_cast(OpCode::RETURN)); + tmpTape.push_back(0); + return addConstant(EvalContext{ + tmpTape, VecView(tmpBuffer.data(), tmpBuffer.size())} + .eval()); + } + + size_t i = operations.size(); + operations.push_back(op); + operands.push_back({a, b, c}); + opUses.emplace_back(); + // update uses + for (auto operand : {a, b, c}) { + std::vector *target; + if (operand.isResult()) { + target = &opUses[operand.toInstIndex()]; + } else if (operand.isConst()) { + target = &constantUses[operand.toConstIndex()]; + } else { + continue; + } + // avoid duplicates + if (target->empty() || target->back() != i) target->push_back(i); + } + return {static_cast(i) + 1}; +} + +void Context::optimizeFMA() { + auto tryApply = [&](int i, Operand lhs, Operand rhs) { + if (!lhs.isResult()) return false; + auto lhsInst = lhs.toInstIndex(); + if (operations[lhsInst] != OpCode::MUL || opUses[lhsInst].size() != 1) + return false; + operations[i] = OpCode::FMA; + Operand a = operands[lhsInst][0]; + Operand b = operands[lhsInst][1]; + operands[i][0] = a; + operands[i][1] = b; + operands[i][2] = rhs; + // remove instruction + operations[lhsInst] = OpCode::NOP; + operands[lhsInst][0] = Operand::none(); + operands[lhsInst][1] = Operand::none(); + // update uses, note that we need to maintain the order of the indices + opUses[lhsInst].clear(); + auto updateUses = [&](Operand x) { + if (!x.isResult() && !x.isConst()) return; + auto &uses = x.isResult() ? opUses[x.toInstIndex()] + : constantUses[x.toConstIndex()]; + auto iter1 = std::lower_bound(uses.begin(), uses.end(), lhsInst); + DEBUG_ASSERT(*iter1 == lhsInst, logicErr, "expected use"); + uses.erase(iter1); + auto iter2 = std::lower_bound(uses.begin(), uses.end(), i); + // make sure there is no duplicate + if (iter2 == uses.end() || *iter2 != i + 1) uses.insert(iter2, i); + }; + updateUses(a); + if (a != b) updateUses(b); + return true; + }; + for (size_t i = 0; i < operations.size(); i++) { + if (operations[i] == OpCode::ADD) { + // check if lhs/rhs comes from MUL with no other uses + auto lhs = operands[i][0]; + auto rhs = operands[i][1]; + if (!tryApply(i, lhs, rhs)) tryApply(i, rhs, lhs); + } + } +} + +std::pair, std::vector> Context::genTape() { + std::vector tape; + std::vector buffer; + std::vector constantToReg; + for (int i : {0, 1, 2}) // x, y, z + buffer.push_back(0.0); + // handle constants by putting them inside the buffer/register + // they are different because they require static lifetime, and cannot be + // changed in an execution + // FIXME: this is just temporary, we should optimize by encoding some + // constants with a minimal number of uses into the read-only code when there + // is a register pressure (more than 255...) + for (size_t i = 0; i < constants.size(); i++) { + constantToReg.push_back(0); + if (constantUses[i].empty()) continue; + constantToReg.back() = static_cast(buffer.size()); + buffer.push_back(constants[i]); + } + + std::vector regUsed(buffer.size(), true); + std::vector opToReg; + std::vector availableReg; + + // FIXME: handle spills + for (size_t i = 0; i < operations.size(); i++) { + if (operations[i] == OpCode::NOP) { + opToReg.push_back(0); + continue; + } + if (operations[i] == OpCode::RETURN) { + auto operand = operands[i][0]; + tape.push_back(static_cast(operations[i])); + if (operand.isResult()) + tape.push_back(opToReg[operand.toInstIndex()]); + else + tape.push_back(constantToReg[operand.toConstIndex()]); + dumpOpCode(operations[i]); + std::cout << " r" << static_cast(tape.back()) << std::endl; + break; + } + // free up operand registers if possible + for (auto operand : operands[i]) { + if (!operand.isResult()) continue; + auto operandInst = operand.toInstIndex(); + // not the last instruction, cannot free it up + if (opUses[operandInst].back() != i) continue; + uint8_t reg = opToReg[operandInst]; + // already freed, probably due to identical arguments + if (!regUsed[reg]) continue; + regUsed[reg] = false; + availableReg.push_back(reg); + } + // allocate register + uint8_t reg; + if (availableReg.empty()) { + // GG if we used too many registers, need spilling + if (buffer.size() == 255) { + // just return some nonsense that will not crash + return {{static_cast(OpCode::RETURN), 0}, {0.0}}; + } + reg = buffer.size(); + buffer.push_back(0.0); + regUsed.push_back(true); + } else { + reg = availableReg.back(); + availableReg.pop_back(); + } + opToReg.push_back(reg); + tape.push_back(static_cast(operations[i])); + dumpOpCode(operations[i]); + std::cout << " r" << static_cast(reg); + tape.push_back(reg); + for (auto operand : operands[i]) { + if (operand.isNone()) break; + if (operand.isResult()) + tape.push_back(opToReg[operand.toInstIndex()]); + else + tape.push_back(constantToReg[operand.toConstIndex()]); + std::cout << " r" << static_cast(tape.back()); + } + std::cout << std::endl; + } + std::cout << "-----------" << std::endl; + return std::make_pair(std::move(tape), std::move(buffer)); +} + +} // namespace manifold::sdf diff --git a/src/sdf/context.h b/src/sdf/context.h new file mode 100644 index 000000000..dbf5090d7 --- /dev/null +++ b/src/sdf/context.h @@ -0,0 +1,71 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include + +#include "tape.h" + +namespace manifold::sdf { + +struct Operand { + int id; + + static Operand none() { return {0}; } + bool isConst() const { return id <= -4; } + bool isResult() const { return id > 0; } + bool isNone() const { return id == 0; } + int toConstIndex() const { return -(id + 4); } + int toInstIndex() const { return id - 1; } + bool operator==(const Operand& other) const { return id == other.id; } + bool operator!=(const Operand& other) const { return id != other.id; } +}; + +class Context { + public: + Operand addConstant(double d); + Operand addInstruction(OpCode op, Operand a = Operand::none(), + Operand b = Operand::none(), + Operand c = Operand::none()); + void optimizeFMA(); + // TODO: DCE + + std::pair, std::vector> genTape(); + + void dump() const; + + private: + // constants have negative IDs, starting from -4 + // -1, -2 and -3 are reserved for x y z + std::unordered_map constantsIds; + std::vector constants; + // constant use vector, elements are instruction indices + // constant with ID -4 is mapped to 0, etc. + std::vector> constantUses; + // instructions, index 0 is mapped to ID 1, etc. + std::vector operations; + // instruction value use vector, elements are instruction indices + std::vector> opUses; + // operands, 0 is invalid (uses fewer operands) + // +ve are instruction results + // -ve are constants + std::vector> operands; + + std::vector tmpTape; + std::vector tmpBuffer; +}; + +} // namespace manifold::sdf diff --git a/src/sdf/interval.h b/src/sdf/interval.h index d2b6ddbae..c9ec0d46c 100644 --- a/src/sdf/interval.h +++ b/src/sdf/interval.h @@ -26,6 +26,11 @@ struct Interval { Domain lower; Domain upper; + Interval() + : lower(-std::numeric_limits::infinity()), + upper(std::numeric_limits::infinity()) {} + Interval(Domain v) : lower(v), upper(v) {} + Interval(Domain lower, Domain upper) : lower(lower), upper(upper) {} static Interval constant(Domain v) { return {v, v}; } constexpr Interval operator+(const Interval &other) const { diff --git a/src/sdf/tape.h b/src/sdf/tape.h index 519a8efa3..0f2aec70c 100644 --- a/src/sdf/tape.h +++ b/src/sdf/tape.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "interval.h" @@ -26,6 +27,7 @@ namespace manifold::sdf { enum class OpCode : uint8_t { NOP, RETURN, + // CONST, // unary operations ABS, @@ -113,6 +115,11 @@ struct EvalContext { Domain x = operand(tape[i + 2]); operand(tape[i + 1]) = handle_unary(current, x); i += 3; + // } else if (current == OpCode::CONST) { + // double x; + // std::memcpy(&x, tape.data() + i + 2, sizeof(x)); + // operand(tape[i + 1]) = x; + // i += 2 + sizeof(x); } else if (current == OpCode::RETURN) { return operand(tape[i + 1]); } else { diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp new file mode 100644 index 000000000..589bd8f23 --- /dev/null +++ b/src/sdf/value.cpp @@ -0,0 +1,243 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "value.h" + +#include + +#include "context.h" +#include "tape.h" + +namespace manifold::sdf { + +struct ValueOperation { + OpCode op; + std::array operands; + + ValueOperation(OpCode op, Value a, Value b, Value c) + : op(op), operands({a, b, c}) {} +}; + +Value Value::Invalid() { return Value(ValueKind::INVALID, 0.0); } + +Value Value::Constant(double d) { return Value(ValueKind::CONSTANT, d); } + +Value Value::X() { return Value(ValueKind::X, 0.0); } + +Value Value::Y() { return Value(ValueKind::Y, 0.0); } + +Value Value::Z() { return Value(ValueKind::Z, 0.0); } + +Value Value::operator+(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::ADD, *this, other, Invalid())); +} + +Value Value::operator-(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::SUB, *this, other, Invalid())); +} + +Value Value::operator*(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::MUL, *this, other, Invalid())); +} + +Value Value::operator/(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::DIV, *this, other, Invalid())); +} + +Value Value::cond(const Value& then, const Value& otherwise) const { + return Value( + ValueKind::OPERATION, + std::make_shared(OpCode::CHOICE, *this, then, otherwise)); +} + +Value Value::mod(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::MOD, *this, other, Invalid())); +} + +Value Value::min(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::MIN, *this, other, Invalid())); +} + +Value Value::max(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::MAX, *this, other, Invalid())); +} + +Value Value::operator==(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::EQ, *this, other, Invalid())); +} + +Value Value::operator>(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::GT, *this, other, Invalid())); +} + +Value Value::operator&&(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::AND, *this, other, Invalid())); +} + +Value Value::operator||(const Value& other) const { + return Value(ValueKind::OPERATION, std::make_shared( + OpCode::OR, *this, other, Invalid())); +} + +Value Value::abs() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::ABS, *this, Invalid(), + Invalid())); +} + +Value Value::operator-() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::NEG, *this, Invalid(), + Invalid())); +} + +Value Value::exp() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::EXP, *this, Invalid(), + Invalid())); +} + +Value Value::log() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::LOG, *this, Invalid(), + Invalid())); +} + +Value Value::sqrt() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::SQRT, *this, Invalid(), + Invalid())); +} + +Value Value::floor() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::FLOOR, *this, Invalid(), + Invalid())); +} + +Value Value::ceil() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::CEIL, *this, Invalid(), + Invalid())); +} + +Value Value::round() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::ROUND, *this, Invalid(), + Invalid())); +} + +Value Value::sin() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::SIN, *this, Invalid(), + Invalid())); +} + +Value Value::cos() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::COS, *this, Invalid(), + Invalid())); +} + +Value Value::tan() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::TAN, *this, Invalid(), + Invalid())); +} + +Value Value::asin() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::ASIN, *this, Invalid(), + Invalid())); +} + +Value Value::acos() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::ACOS, *this, Invalid(), + Invalid())); +} + +Value Value::atan() const { + return Value(ValueKind::OPERATION, + std::make_shared(OpCode::ATAN, *this, Invalid(), + Invalid())); +} + +std::pair, std::vector> Value::genTape() const { + using VO = std::shared_ptr; + Context ctx; + std::unordered_map cache; + std::vector stack; + if (kind == ValueKind::OPERATION) stack.push_back(std::get(v)); + + auto getOperand = [&](Value x, std::function f) { + switch (x.kind) { + case ValueKind::OPERATION: { + auto iter = cache.find(std::get(x.v)); + if (iter != cache.end()) return iter->second; + // stack.push_back(std::get(x.v)); + // ready = false; + f(x); + return Operand::none(); + } + case ValueKind::CONSTANT: + return ctx.addConstant(std::get(x.v)); + case ValueKind::X: + return Operand{-1}; + case ValueKind::Y: + return Operand{-2}; + case ValueKind::Z: + return Operand{-3}; + case ValueKind::INVALID: + return Operand::none(); + } + }; + while (!stack.empty()) { + bool ready = true; + VO current = stack.back(); + auto f = [&](Value x) { + stack.push_back(std::get(x.v)); + ready = false; + }; + Operand a = getOperand(current->operands[0], f); + Operand b = getOperand(current->operands[1], f); + Operand c = getOperand(current->operands[2], f); + if (ready) { + stack.pop_back(); + // check if inserted... can happen when evaluating with a DAG + if (cache.find(current) != cache.end()) continue; + cache.insert({current, ctx.addInstruction(current->op, a, b, c)}); + } + } + + Operand result = getOperand(*this, [](Value _) {}); + ctx.addInstruction(OpCode::RETURN, result, Operand::none(), Operand::none()); + + ctx.dump(); + ctx.optimizeFMA(); + ctx.dump(); + return ctx.genTape(); +} + +} // namespace manifold::sdf diff --git a/src/sdf/value.h b/src/sdf/value.h new file mode 100644 index 000000000..6eda08bf4 --- /dev/null +++ b/src/sdf/value.h @@ -0,0 +1,75 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +namespace manifold::sdf { + +enum class ValueKind { CONSTANT, X, Y, Z, OPERATION, INVALID }; + +struct ValueOperation; + +class Value { + public: + static Value Invalid(); + static Value Constant(double d); + static Value X(); + static Value Y(); + static Value Z(); + + Value operator+(const Value& other) const; + Value operator-(const Value& other) const; + Value operator*(const Value& other) const; + Value operator/(const Value& other) const; + Value cond(const Value& then, const Value& otherwise) const; + Value mod(const Value& m) const; + Value min(const Value& other) const; + Value max(const Value& other) const; + + // TODO: should we have a boolean value type? + Value operator==(const Value& other) const; + Value operator>(const Value& other) const; + Value operator&&(const Value& other) const; + Value operator||(const Value& other) const; + + Value abs() const; + Value operator-() const; + Value exp() const; + Value log() const; + Value sqrt() const; + Value floor() const; + Value ceil() const; + Value round() const; + Value sin() const; + Value cos() const; + Value tan() const; + Value asin() const; + Value acos() const; + Value atan() const; + + // internal use only + std::pair, std::vector> genTape() const; + + private: + ValueKind kind = ValueKind::INVALID; + std::variant> v; + + Value(ValueKind kind, std::variant> v) + : kind(kind), v(v) {} +}; +} // namespace manifold::sdf diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 53d571265..283b57cb0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,6 +31,7 @@ set( hull_test.cpp samples_test.cpp boolean_complex_test.cpp + sdf_tape_test.cpp $<$:cross_section_test.cpp> $<$:manifoldc_test.cpp> ) diff --git a/test/sdf_tape_test.cpp b/test/sdf_tape_test.cpp new file mode 100644 index 000000000..dc2c076c6 --- /dev/null +++ b/test/sdf_tape_test.cpp @@ -0,0 +1,118 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "../src/sdf/tape.h" +#include "../src/sdf/value.h" +#include "test.h" + +using namespace manifold; +using namespace manifold::sdf; + +int recursive_interval(EvalContext>& ctx, vec3 start, + vec3 delta, const double edgeLength, + const double level) { + if (delta.x < edgeLength && delta.y < edgeLength && delta.z < edgeLength) + return 1; // we should do one evaluation... + ctx.buffer[0] = {start.x, start.x + delta.x}; + ctx.buffer[1] = {start.y, start.y + delta.y}; + ctx.buffer[2] = {start.z, start.z + delta.z}; + auto result = ctx.eval() - level; + + int count = 1; // we did one evaluation + if (result.lower >= 0.0 || result.upper <= 0.0) return 1; + // in case it is not a cube, we may want to avoid dividing too much in some + // axis + vec3 new_delta = {delta.x < edgeLength ? delta.x : (delta.x / 2), + delta.y < edgeLength ? delta.y : (delta.y / 2), + delta.z < edgeLength ? delta.z : (delta.z / 2)}; + // can easily be converted into a worklist + for (int a = 0; a <= (delta.x < edgeLength ? 0 : 1); a++) + for (int b = 0; b <= (delta.y < edgeLength ? 0 : 1); b++) + for (int c = 0; c <= (delta.z < edgeLength ? 0 : 1); c++) { + auto new_start = start; + if (a == 1) new_start.x += new_delta.x; + if (b == 1) new_start.y += new_delta.y; + if (c == 1) new_start.z += new_delta.z; + count += + recursive_interval(ctx, new_start, new_delta, edgeLength, level); + } + + return count; +} + +TEST(TAPE, Gyroid) { + const double n = 20; + const double period = kTwoPi; + Value constantKPi4 = Value::Constant(kPi / 4); + auto x = Value::X() - constantKPi4; + auto y = Value::Y() - constantKPi4; + auto z = Value::Y() - constantKPi4; + + auto result = x.cos() * y.sin() + y.cos() * z.sin() + z.cos() * x.sin(); + auto tape = result.genTape(); + std::vector> intervalBuffer; + for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); + EvalContext> ctx{ + tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; + std::cout << recursive_interval(ctx, vec3(-period), vec3(period * 2), + period / n, -0.4) + << std::endl; + std::cout << recursive_interval(ctx, vec3(-period), vec3(period * 2), + period / n, 0.4) + << std::endl; +} + +TEST(TAPE, Blobs) { + std::vector balls = {{0, 0, 0, 2}, // + {1, 2, 3, 2}, // + {-2, 2, -2, 1}, // + {-2, -3, -2, 2}, // + {-3, -1, -3, 1}, // + {2, -3, -2, 2}, // + {-2, 3, 2, 2}, // + {-2, -3, 2, 2}, // + {1, -1, 1, -2}, // + {-4, -3, -2, 1}}; + auto lengthFn = [](Value x, Value y, Value z) { + return (x * x + y * y + z * z).sqrt(); + }; + auto smoothstepFn = [](Value edge0, Value edge1, Value a) { + auto x = ((a - edge0) / (edge1 - edge0)) + .min(Value::Constant(1)) + .max(Value::Constant(0)); + return x * x * (Value::Constant(3) - Value::Constant(2) * x); + }; + Value d = Value::Constant(0); + for (const auto& ball : balls) { + auto tmp = smoothstepFn(Value::Constant(-1), Value::Constant(1), + Value::Constant(ball.w).abs() - + lengthFn(Value::Constant(ball.x) - Value::X(), + Value::Constant(ball.y) - Value::Y(), + Value::Constant(ball.z) - Value::Z())); + if (ball.w > 0) + d = d + tmp; + else + d = d - tmp; + } + auto tape = d.genTape(); + + std::vector> intervalBuffer; + for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); + EvalContext> ctx{ + tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; + std::cout << recursive_interval(ctx, vec3(-5), vec3(10), 0.05, 0.5) + << std::endl; +} From c62ccf6804c577ecf2235fdd3937dfe40acd1904 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 10:29:32 +0800 Subject: [PATCH 03/37] fix control flow --- src/sdf/value.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index 589bd8f23..a60c31de7 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -209,7 +209,7 @@ std::pair, std::vector> Value::genTape() const { return Operand{-2}; case ValueKind::Z: return Operand{-3}; - case ValueKind::INVALID: + default: return Operand::none(); } }; From 6e4fe6500889c1b2a3668bf9e1d7a8fbc5b2faff Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 10:34:51 +0800 Subject: [PATCH 04/37] use size_t for i --- src/sdf/context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index e90dffa89..8d3e77cb0 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -200,7 +200,7 @@ Operand Context::addInstruction(OpCode op, Operand a, Operand b, Operand c) { } void Context::optimizeFMA() { - auto tryApply = [&](int i, Operand lhs, Operand rhs) { + auto tryApply = [&](size_t i, Operand lhs, Operand rhs) { if (!lhs.isResult()) return false; auto lhsInst = lhs.toInstIndex(); if (operations[lhsInst] != OpCode::MUL || opUses[lhsInst].size() != 1) From 01ef224c5e25c1beb59c9f59b71d29f6aaf442d3 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 10:46:14 +0800 Subject: [PATCH 05/37] fixes --- src/sdf/context.h | 4 ++-- src/sdf/tape.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sdf/context.h b/src/sdf/context.h index dbf5090d7..b67e4d438 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -28,8 +28,8 @@ struct Operand { bool isConst() const { return id <= -4; } bool isResult() const { return id > 0; } bool isNone() const { return id == 0; } - int toConstIndex() const { return -(id + 4); } - int toInstIndex() const { return id - 1; } + size_t toConstIndex() const { return static_cast(-(id + 4)); } + size_t toInstIndex() const { return static_cast(id - 1); } bool operator==(const Operand& other) const { return id == other.id; } bool operator!=(const Operand& other) const { return id != other.id; } }; diff --git a/src/sdf/tape.h b/src/sdf/tape.h index 0f2aec70c..e80695b8a 100644 --- a/src/sdf/tape.h +++ b/src/sdf/tape.h @@ -115,11 +115,11 @@ struct EvalContext { Domain x = operand(tape[i + 2]); operand(tape[i + 1]) = handle_unary(current, x); i += 3; - // } else if (current == OpCode::CONST) { - // double x; - // std::memcpy(&x, tape.data() + i + 2, sizeof(x)); - // operand(tape[i + 1]) = x; - // i += 2 + sizeof(x); + // } else if (current == OpCode::CONST) { + // double x; + // std::memcpy(&x, tape.data() + i + 2, sizeof(x)); + // operand(tape[i + 1]) = x; + // i += 2 + sizeof(x); } else if (current == OpCode::RETURN) { return operand(tape[i + 1]); } else { From 6104170f74aa2d3d1bdeab3ed88e95f39f64a485 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 11:38:32 +0800 Subject: [PATCH 06/37] fix simple error --- src/sdf.cpp | 8 ++++-- src/sdf/context.cpp | 18 +++++++------ test/sdf_tape_test.cpp | 60 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/sdf.cpp b/src/sdf.cpp index 389c0d85a..29bde90a2 100644 --- a/src/sdf.cpp +++ b/src/sdf.cpp @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "./hashtable.h" #include "./impl.h" #include "./parallel.h" #include "./utils.h" #include "./vec.h" #include "manifold/manifold.h" -#include namespace { using namespace manifold; @@ -491,7 +492,10 @@ Manifold Manifold::LevelSet(std::function sdf, Box bounds, origin, spacing, gridSize, level, sdf); }); auto end = std::chrono::high_resolution_clock::now(); - printf("sdf evaluations: %d, %ld\n", sdf_counter.load(), std::chrono::duration_cast(end - start).count()); + printf("sdf evaluations: %d, %dus\n", sdf_counter.load(), + static_cast( + std::chrono::duration_cast(end - start) + .count())); size_t tableSize = std::min( 2 * maxIndex, static_cast(10 * la::pow(maxIndex, 0.667))); diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 8d3e77cb0..754b27cab 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -265,6 +265,14 @@ std::pair, std::vector> Context::genTape() { std::vector opToReg; std::vector availableReg; + auto getReg = [&](Operand operand) { + if (operand.isResult()) + return opToReg[operand.toInstIndex()]; + else if (operand.isConst()) + return constantToReg[operand.toConstIndex()]; + return static_cast(-(operand.id + 1)); + }; + // FIXME: handle spills for (size_t i = 0; i < operations.size(); i++) { if (operations[i] == OpCode::NOP) { @@ -274,10 +282,7 @@ std::pair, std::vector> Context::genTape() { if (operations[i] == OpCode::RETURN) { auto operand = operands[i][0]; tape.push_back(static_cast(operations[i])); - if (operand.isResult()) - tape.push_back(opToReg[operand.toInstIndex()]); - else - tape.push_back(constantToReg[operand.toConstIndex()]); + tape.push_back(getReg(operand)); dumpOpCode(operations[i]); std::cout << " r" << static_cast(tape.back()) << std::endl; break; @@ -316,10 +321,7 @@ std::pair, std::vector> Context::genTape() { tape.push_back(reg); for (auto operand : operands[i]) { if (operand.isNone()) break; - if (operand.isResult()) - tape.push_back(opToReg[operand.toInstIndex()]); - else - tape.push_back(constantToReg[operand.toConstIndex()]); + tape.push_back(getReg(operand)); std::cout << " r" << static_cast(tape.back()); } std::cout << std::endl; diff --git a/test/sdf_tape_test.cpp b/test/sdf_tape_test.cpp index dc2c076c6..1065c95d4 100644 --- a/test/sdf_tape_test.cpp +++ b/test/sdf_tape_test.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include "../src/sdf/tape.h" @@ -59,20 +60,44 @@ TEST(TAPE, Gyroid) { Value constantKPi4 = Value::Constant(kPi / 4); auto x = Value::X() - constantKPi4; auto y = Value::Y() - constantKPi4; - auto z = Value::Y() - constantKPi4; + auto z = Value::Z() - constantKPi4; auto result = x.cos() * y.sin() + y.cos() * z.sin() + z.cos() * x.sin(); auto tape = result.genTape(); + + // verify by comparing with grid evaluation results + auto gyroid = [](vec3 p) { + p -= kPi / 4; + return std::cos(p.x) * std::sin(p.y) + std::cos(p.y) * std::sin(p.z) + + std::cos(p.z) * std::sin(p.x); + }; + + EvalContext ctxSimple{ + tape.first, VecView(tape.second.data(), tape.second.size())}; + for (double x = -period; x < period; x += period / n) { + for (double y = -period; y < period; y += period / n) { + for (double z = -period; z < period; z += period / n) { + ctxSimple.buffer[0] = x; + ctxSimple.buffer[1] = y; + ctxSimple.buffer[2] = z; + ASSERT_NEAR(ctxSimple.eval(), gyroid({x, y, z}), 1e-12); + } + } + } + std::vector> intervalBuffer; for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); EvalContext> ctx{ tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; + auto start = std::chrono::high_resolution_clock::now(); std::cout << recursive_interval(ctx, vec3(-period), vec3(period * 2), period / n, -0.4) << std::endl; - std::cout << recursive_interval(ctx, vec3(-period), vec3(period * 2), - period / n, 0.4) - << std::endl; + auto end = std::chrono::high_resolution_clock::now(); + auto time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("interval evaluation: %dus\n", time); } TEST(TAPE, Blobs) { @@ -109,10 +134,37 @@ TEST(TAPE, Blobs) { } auto tape = d.genTape(); + auto blobs = [&balls](vec3 p) { + double d = 0; + for (const auto& ball : balls) { + d += (ball.w > 0 ? 1 : -1) * + smoothstep(-1, 1, std::abs(ball.w) - la::length(vec3(ball) - p)); + } + return d; + }; + EvalContext ctxSimple{ + tape.first, VecView(tape.second.data(), tape.second.size())}; + for (double x = -5; x < 5; x += 0.05) { + for (double y = -5; y < 5; y += 0.05) { + for (double z = -5; z < 5; z += 0.05) { + ctxSimple.buffer[0] = x; + ctxSimple.buffer[1] = y; + ctxSimple.buffer[2] = z; + ASSERT_NEAR(ctxSimple.eval(), blobs({x, y, z}), 1e-12); + } + } + } + std::vector> intervalBuffer; for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); EvalContext> ctx{ tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; + auto start = std::chrono::high_resolution_clock::now(); std::cout << recursive_interval(ctx, vec3(-5), vec3(10), 0.05, 0.5) << std::endl; + auto end = std::chrono::high_resolution_clock::now(); + auto time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("interval evaluation: %dus\n", time); } From e3e1ddb6a36cf069ba292dafd0db3955088baf1f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 14:41:35 +0800 Subject: [PATCH 07/37] add reschedule --- src/sdf/context.cpp | 132 ++++++++++++++++++++++++++++++++++++++++++++ src/sdf/context.h | 2 +- src/sdf/value.cpp | 2 + 3 files changed, 135 insertions(+), 1 deletion(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 754b27cab..d70aa191e 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -242,6 +242,137 @@ void Context::optimizeFMA() { } } +// this does dead code elimination as well +// assumes the last instruction is return +// and note that this is not optimal, and cannot be optimal without dealing with +// NP-hard stuff... +void Context::reschedule() { + DEBUG_ASSERT(!operations.empty() && operations.back() == OpCode::RETURN, + logicErr, "return expected"); + + auto oldOperations = std::move(operations); + auto oldOperands = std::move(operands); + opUses.clear(); + for (auto &uses : constantUses) uses.clear(); + + std::unordered_map computedInst; + std::vector stack; + if (oldOperands.back()[0].isResult()) + stack.push_back(oldOperands.back()[0].toInstIndex()); + + std::unordered_map bitset; + std::unordered_map distances; + std::vector tmpStack; + + auto requiresComputation = [&computedInst](Operand operand) { + return operand.isResult() && + computedInst.find(operand.toInstIndex()) == computedInst.end(); + }; + auto toNewOperand = [&computedInst](Operand old) { + if (old.isResult()) return computedInst[old.toInstIndex()]; + return old; + }; + + while (!stack.empty()) { + int numResults = 0; + auto back = stack.back(); + auto &curOperands = oldOperands[back]; + for (auto operand : curOperands) + if (requiresComputation(operand)) numResults += 1; + if (numResults > 1) { + // find common results first + // does this by recursively marking instructions to be the transitive + // dependency of operands + // we use a bitset, so if the bitset & (1 << (numResults + 1)) - 1, + // it means that the instruction is the common dependency for all operands + uint8_t mask = (1 << (numResults + 1)) - 1; + numResults = 0; + for (auto operand : curOperands) { + if (!requiresComputation(operand)) continue; + tmpStack.push_back(operand.toInstIndex()); + while (!tmpStack.empty()) { + auto current = tmpStack.back(); + tmpStack.pop_back(); + // already computed + if (computedInst.find(current) != computedInst.end()) continue; + auto iter = bitset.find(current); + if (iter == bitset.end()) { + // new dependency + bitset.insert({current, 1 << numResults}); + } else { + iter->second |= 1 << numResults; + } + for (auto x : oldOperands[current]) { + if (!x.isResult()) continue; + tmpStack.push_back(x.toInstIndex()); + } + } + numResults += 1; + } + // compute operand costs as distance in the dependency graph + std::array costs = {0, 0, 0}; + std::array ids = {0, 1, 2}; + for (int i = 0; i < curOperands.size(); i++) { + auto operand = curOperands[i]; + if (!requiresComputation(operand)) continue; + tmpStack.push_back(operand.toInstIndex()); + while (!tmpStack.empty()) { + auto current = tmpStack.back(); + size_t maxDistance = 0; + for (auto x : oldOperands[current]) { + if (!x.isResult()) continue; + auto inst = x.toInstIndex(); + + // computed, doesn't affect distance + if (computedInst.find(inst) != computedInst.end()) continue; + + auto iter1 = bitset.find(inst); + DEBUG_ASSERT(iter1 != bitset.end(), logicErr, "should be found"); + // shared dependency between operands, also doesn't affect distance + if ((iter1->second & mask) == mask) continue; + + auto iter2 = distances.find(inst); + if (iter2 == distances.end()) { + // not computed + tmpStack.push_back(x.toInstIndex()); + maxDistance = std::numeric_limits::max(); + } else { + maxDistance = std::max(maxDistance, iter2->second); + } + } + if (maxDistance != std::numeric_limits::max()) { + tmpStack.pop_back(); + distances.insert({current, maxDistance + 1}); + } + } + costs[i] = distances[operand.toInstIndex()]; + distances.clear(); + } + std::sort(ids.begin(), ids.end(), + [&costs](size_t x, size_t y) { return costs[x] < costs[y]; }); + // expensive operands are placed at the top of the stack, i.e. scheduled + // earlier + for (size_t x : ids) + if (requiresComputation(curOperands[x])) + stack.push_back(curOperands[x].toInstIndex()); + + bitset.clear(); + } else if (numResults == 1) { + for (auto operand : curOperands) + if (requiresComputation(operand)) + stack.push_back(operand.toInstIndex()); + } else { + stack.pop_back(); + Operand result = addInstruction( + oldOperations[back], toNewOperand(curOperands[0]), + toNewOperand(curOperands[1]), toNewOperand(curOperands[2])); + computedInst.insert({back, result}); + } + } + addInstruction(OpCode::RETURN, + computedInst[oldOperands.back()[0].toInstIndex()]); +} + std::pair, std::vector> Context::genTape() { std::vector tape; std::vector buffer; @@ -313,6 +444,7 @@ std::pair, std::vector> Context::genTape() { } else { reg = availableReg.back(); availableReg.pop_back(); + regUsed[reg] = true; } opToReg.push_back(reg); tape.push_back(static_cast(operations[i])); diff --git a/src/sdf/context.h b/src/sdf/context.h index b67e4d438..2f82316f2 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -41,7 +41,7 @@ class Context { Operand b = Operand::none(), Operand c = Operand::none()); void optimizeFMA(); - // TODO: DCE + void reschedule(); std::pair, std::vector> genTape(); diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index a60c31de7..647978219 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -237,6 +237,8 @@ std::pair, std::vector> Value::genTape() const { ctx.dump(); ctx.optimizeFMA(); ctx.dump(); + ctx.reschedule(); + ctx.dump(); return ctx.genTape(); } From a702cc8936490ed794ae3a58bd5c5e4d2e72e210 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 14:50:29 +0800 Subject: [PATCH 08/37] remove prints --- src/sdf.cpp | 12 ------------ src/sdf/context.cpp | 9 +-------- src/sdf/value.cpp | 3 --- 3 files changed, 1 insertion(+), 23 deletions(-) diff --git a/src/sdf.cpp b/src/sdf.cpp index 29bde90a2..2dcbdab26 100644 --- a/src/sdf.cpp +++ b/src/sdf.cpp @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "./hashtable.h" #include "./impl.h" #include "./parallel.h" @@ -123,8 +121,6 @@ vec3 Bound(vec3 pos, vec3 origin, vec3 spacing, ivec3 gridSize) { return min(max(pos, origin), origin + spacing * (vec3(gridSize) - 1)); } -static std::atomic_int32_t sdf_counter; - double BoundedSDF(ivec4 gridIndex, vec3 origin, vec3 spacing, ivec3 gridSize, double level, std::function sdf) { const ivec3 xyz(gridIndex); @@ -135,7 +131,6 @@ double BoundedSDF(ivec4 gridIndex, vec3 origin, vec3 spacing, ivec3 gridSize, if (boundDist < 0) { return 0.0; } - sdf_counter.fetch_add(1, std::memory_order_relaxed); const double d = sdf(Position(gridIndex, origin, spacing)) - level; return boundDist == 0 ? std::min(d, 0.0) : d; } @@ -482,20 +477,13 @@ Manifold Manifold::LevelSet(std::function sdf, Box bounds, const vec3 origin = bounds.min; Vec voxels(maxIndex); - sdf_counter.store(0); - auto start = std::chrono::high_resolution_clock::now(); for_each_n( pol, countAt(0_uz), maxIndex, [&voxels, sdf, level, origin, spacing, gridSize, gridPow](Uint64 idx) { voxels[idx] = BoundedSDF(DecodeIndex(idx, gridPow) - kVoxelOffset, origin, spacing, gridSize, level, sdf); }); - auto end = std::chrono::high_resolution_clock::now(); - printf("sdf evaluations: %d, %dus\n", sdf_counter.load(), - static_cast( - std::chrono::duration_cast(end - start) - .count())); size_t tableSize = std::min( 2 * maxIndex, static_cast(10 * la::pow(maxIndex, 0.667))); diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index d70aa191e..ac0e1dc6c 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -312,7 +312,7 @@ void Context::reschedule() { // compute operand costs as distance in the dependency graph std::array costs = {0, 0, 0}; std::array ids = {0, 1, 2}; - for (int i = 0; i < curOperands.size(); i++) { + for (size_t i = 0; i < curOperands.size(); i++) { auto operand = curOperands[i]; if (!requiresComputation(operand)) continue; tmpStack.push_back(operand.toInstIndex()); @@ -414,8 +414,6 @@ std::pair, std::vector> Context::genTape() { auto operand = operands[i][0]; tape.push_back(static_cast(operations[i])); tape.push_back(getReg(operand)); - dumpOpCode(operations[i]); - std::cout << " r" << static_cast(tape.back()) << std::endl; break; } // free up operand registers if possible @@ -448,17 +446,12 @@ std::pair, std::vector> Context::genTape() { } opToReg.push_back(reg); tape.push_back(static_cast(operations[i])); - dumpOpCode(operations[i]); - std::cout << " r" << static_cast(reg); tape.push_back(reg); for (auto operand : operands[i]) { if (operand.isNone()) break; tape.push_back(getReg(operand)); - std::cout << " r" << static_cast(tape.back()); } - std::cout << std::endl; } - std::cout << "-----------" << std::endl; return std::make_pair(std::move(tape), std::move(buffer)); } diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index 647978219..a3b49924e 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -234,11 +234,8 @@ std::pair, std::vector> Value::genTape() const { Operand result = getOperand(*this, [](Value _) {}); ctx.addInstruction(OpCode::RETURN, result, Operand::none(), Operand::none()); - ctx.dump(); ctx.optimizeFMA(); - ctx.dump(); ctx.reschedule(); - ctx.dump(); return ctx.genTape(); } From 05107c940c969e86bf025c893212218e37157dbe Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 15:33:33 +0800 Subject: [PATCH 09/37] common subexpression elimination --- src/sdf/context.cpp | 36 ++++++++++++++++++++++++++++++++---- src/sdf/context.h | 7 +++++++ src/sdf/value.cpp | 2 -- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index ac0e1dc6c..f351c9efb 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -145,8 +145,36 @@ Operand Context::addConstant(double d) { return result.first->second; } -// TODO: hashconsing Operand Context::addInstruction(OpCode op, Operand a, Operand b, Operand c) { + switch (op) { + case OpCode::ADD: + case OpCode::MUL: + case OpCode::MIN: + case OpCode::MAX: + case OpCode::EQ: + case OpCode::AND: + case OpCode::OR: + case OpCode::FMA: + // first two operands commutative, sort them + // makes it more likely to find common subexpressions + if (a.id > b.id) std::swap(a, b); + break; + default: + break; + } + // common subexpression elimination + auto key = std::make_pair(op, std::make_tuple(a, b, c)); + auto entry = cache.find(key); + if (entry != cache.end()) return entry->second; + auto result = addInstructionNoCache(op, a, b, c); + cache.insert({key, result}); + return result; +} + +// bypass the cache because we don't expect to have more common subexpressions +// after optimizations +Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, + Operand c) { // constant choice if (op == OpCode::CHOICE && a.isConst()) { if (constants[a.toConstIndex()] == 1.0) return b; @@ -363,14 +391,14 @@ void Context::reschedule() { stack.push_back(operand.toInstIndex()); } else { stack.pop_back(); - Operand result = addInstruction( + Operand result = addInstructionNoCache( oldOperations[back], toNewOperand(curOperands[0]), toNewOperand(curOperands[1]), toNewOperand(curOperands[2])); computedInst.insert({back, result}); } } - addInstruction(OpCode::RETURN, - computedInst[oldOperands.back()[0].toInstIndex()]); + addInstructionNoCache(OpCode::RETURN, + computedInst[oldOperands.back()[0].toInstIndex()]); } std::pair, std::vector> Context::genTape() { diff --git a/src/sdf/context.h b/src/sdf/context.h index 2f82316f2..402954c62 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include #include @@ -32,6 +33,7 @@ struct Operand { size_t toInstIndex() const { return static_cast(id - 1); } bool operator==(const Operand& other) const { return id == other.id; } bool operator!=(const Operand& other) const { return id != other.id; } + bool operator<(const Operand& other) const { return id < other.id; } }; class Context { @@ -66,6 +68,11 @@ class Context { std::vector tmpTape; std::vector tmpBuffer; + std::map>, Operand> cache; + + Operand addInstructionNoCache(OpCode op, Operand a = Operand::none(), + Operand b = Operand::none(), + Operand c = Operand::none()); }; } // namespace manifold::sdf diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index a3b49924e..b4fded75b 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -196,8 +196,6 @@ std::pair, std::vector> Value::genTape() const { case ValueKind::OPERATION: { auto iter = cache.find(std::get(x.v)); if (iter != cache.end()) return iter->second; - // stack.push_back(std::get(x.v)); - // ready = false; f(x); return Operand::none(); } From dbad4153954a272ac6b7b52d863897ac4391d1b0 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 15:35:13 +0800 Subject: [PATCH 10/37] clear cache to avoid having stale data --- src/sdf/context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index f351c9efb..7772b14e1 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -277,7 +277,7 @@ void Context::optimizeFMA() { void Context::reschedule() { DEBUG_ASSERT(!operations.empty() && operations.back() == OpCode::RETURN, logicErr, "return expected"); - + cache.clear(); auto oldOperations = std::move(operations); auto oldOperands = std::move(operands); opUses.clear(); From 6b1fa4ba3f2d283d2a1f27fdfd8907e75ce64950 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 24 Dec 2024 15:36:47 +0800 Subject: [PATCH 11/37] format --- scripts/format.sh | 2 +- src/sdf/context.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/format.sh b/scripts/format.sh index fd72866d7..e6ef3e94a 100755 --- a/scripts/format.sh +++ b/scripts/format.sh @@ -14,7 +14,7 @@ $CLANG_FORMAT -i bindings/wasm/*.{js,ts} & $CLANG_FORMAT -i bindings/wasm/examples/*.{js,ts,html} & $CLANG_FORMAT -i bindings/wasm/examples/public/*.{js,ts} & $CLANG_FORMAT -i src/*.{h,cpp} & -$CLANG_FORMAT -i src/*/*.cpp & +$CLANG_FORMAT -i src/*/*.{h,cpp} & $CLANG_FORMAT -i include/manifold/*.h & black --quiet bindings/python/examples/*.py & diff --git a/src/sdf/context.h b/src/sdf/context.h index 402954c62..f8a0cbdbd 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -13,8 +13,8 @@ // limitations under the License. #pragma once -#include #include +#include #include #include @@ -68,7 +68,8 @@ class Context { std::vector tmpTape; std::vector tmpBuffer; - std::map>, Operand> cache; + std::map>, Operand> + cache; Operand addInstructionNoCache(OpCode op, Operand a = Operand::none(), Operand b = Operand::none(), From 366d91cde347b99151c4192b309be76659c44710 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 02:31:30 +0800 Subject: [PATCH 12/37] proper spilling --- src/sdf/context.cpp | 303 ++++++++++++++++++++++---------------------- src/sdf/tape.h | 125 ++++++++++++++---- src/sdf/value.h | 1 - 3 files changed, 254 insertions(+), 175 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 7772b14e1..e4aa1a75e 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -15,108 +15,26 @@ #include "context.h" #include + +#ifdef MANIFOLD_DEBUG #include +#endif #include "manifold/optional_assert.h" -namespace manifold::sdf { - -void dumpOpCode(OpCode op) { - switch (op) { - case OpCode::NOP: - std::cout << "NOP"; - break; - case OpCode::RETURN: - std::cout << "RETURN"; - break; - case OpCode::ABS: - std::cout << "ABS"; - break; - case OpCode::NEG: - std::cout << "NEG"; - break; - case OpCode::EXP: - std::cout << "EXP"; - break; - case OpCode::LOG: - std::cout << "LOG"; - break; - case OpCode::SQRT: - std::cout << "SQRT"; - break; - case OpCode::FLOOR: - std::cout << "FLOOR"; - break; - case OpCode::CEIL: - std::cout << "CEIL"; - break; - case OpCode::ROUND: - std::cout << "ROUND"; - break; - case OpCode::SIN: - std::cout << "SIN"; - break; - case OpCode::COS: - std::cout << "COS"; - break; - case OpCode::TAN: - std::cout << "TAN"; - break; - case OpCode::ASIN: - std::cout << "ASIN"; - break; - case OpCode::ACOS: - std::cout << "ACOS"; - break; - case OpCode::ATAN: - std::cout << "ATAN"; - break; - case OpCode::DIV: - std::cout << "DIV"; - break; - case OpCode::MOD: - std::cout << "MOD"; - break; - case OpCode::MIN: - std::cout << "MIN"; - break; - case OpCode::MAX: - std::cout << "MAX"; - break; - case OpCode::EQ: - std::cout << "EQ"; - break; - case OpCode::GT: - std::cout << "GT"; - break; - case OpCode::AND: - std::cout << "AND"; - break; - case OpCode::OR: - std::cout << "OR"; - break; - case OpCode::ADD: - std::cout << "ADD"; - break; - case OpCode::SUB: - std::cout << "SUB"; - break; - case OpCode::MUL: - std::cout << "MUL"; - break; - case OpCode::FMA: - std::cout << "FMA"; - break; - case OpCode::CHOICE: - std::cout << "CHOICE"; - break; +template <> +struct std::hash { + size_t operator()(const manifold::sdf::Operand &operand) const { + return std::hash()(operand.id); } -} +}; + +namespace manifold::sdf { void Context::dump() const { +#ifdef MANIFOLD_DEBUG for (size_t i = 0; i < operations.size(); i++) { std::cout << i << " "; - dumpOpCode(operations[i]); std::cout << " "; for (Operand operand : operands[i]) { if (operand.isNone()) break; @@ -133,6 +51,7 @@ void Context::dump() const { std::cout << "}" << std::endl; } std::cout << "-----------" << std::endl; +#endif } Operand Context::addConstant(double d) { @@ -401,83 +320,165 @@ void Context::reschedule() { computedInst[oldOperands.back()[0].toInstIndex()]); } +struct LruEntry { + size_t lastUse; + Operand operand; + uint8_t reg; + + bool operator<(const LruEntry &other) const { + return lastUse < other.lastUse || + (lastUse == other.lastUse && operand.id < other.operand.id); + } +}; + std::pair, std::vector> Context::genTape() { std::vector tape; std::vector buffer; std::vector constantToReg; for (int i : {0, 1, 2}) // x, y, z buffer.push_back(0.0); - // handle constants by putting them inside the buffer/register - // they are different because they require static lifetime, and cannot be - // changed in an execution - // FIXME: this is just temporary, we should optimize by encoding some - // constants with a minimal number of uses into the read-only code when there - // is a register pressure (more than 255...) - for (size_t i = 0; i < constants.size(); i++) { - constantToReg.push_back(0); - if (constantUses[i].empty()) continue; - constantToReg.back() = static_cast(buffer.size()); - buffer.push_back(constants[i]); - } - std::vector regUsed(buffer.size(), true); - std::vector opToReg; std::vector availableReg; + // we may want to make this wrap around... + std::vector lru; + std::unordered_map spills; + std::vector spillSlots; - auto getReg = [&](Operand operand) { - if (operand.isResult()) - return opToReg[operand.toInstIndex()]; - else if (operand.isConst()) - return constantToReg[operand.toConstIndex()]; - return static_cast(-(operand.id + 1)); + auto insertLru = [&](LruEntry entry) { + lru.insert(std::lower_bound(lru.begin(), lru.end(), entry), entry); + }; + auto allocateReg = [&]() { + if (!availableReg.empty()) { + auto reg = availableReg.back(); + availableReg.pop_back(); + return reg; + } + // used too many registers, need to spill something + // note: tested with a limit of 10, spills correctly + if (buffer.size() > 255) { + uint32_t slot; + if (spillSlots.empty()) { + slot = buffer.size(); + buffer.push_back(0.0); + } else { + slot = spillSlots.back(); + spillSlots.pop_back(); + } + spills.insert({lru.front().operand, slot}); + tape.push_back(static_cast(OpCode::STORE)); + std::array tmpBuffer; + std::memcpy(tmpBuffer.begin(), &slot, sizeof(uint32_t)); + for (auto byte : tmpBuffer) tape.push_back(byte); + auto reg = lru.front().reg; + tape.push_back(reg); + lru.erase(lru.begin()); + return reg; + } + auto reg = static_cast(buffer.size()); + buffer.push_back(0.0); + return reg; + }; + auto handleOperands = [&](std::array instOperands, size_t inst) { + auto getReg = [&](Operand operand, size_t inst) { + if (operand.isNone()) return static_cast(0); + // special xyz + if (!operand.isConst() && !operand.isResult()) + return static_cast(-(operand.id + 1)); + // Assume last use was updated, the operand, if present, must be at the + // end of the lru cache. Just do a linear scan from the back + for (auto it = lru.rbegin(); it != lru.rend(); ++it) { + // no result + if (it->lastUse != inst) break; + if (it->operand == operand) { + return it->reg; + } + } + auto reg = allocateReg(); + auto iter = spills.find(operand); + if (iter == spills.end()) { + DEBUG_ASSERT(operand.isConst(), logicErr, + "can only materialize constants"); + tape.push_back(static_cast(OpCode::CONST)); + tape.push_back(reg); + std::array tmpBuffer; + std::memcpy(tmpBuffer.begin(), &constants[operand.toConstIndex()], + sizeof(double)); + for (auto byte : tmpBuffer) tape.push_back(byte); + } else { + tape.push_back(static_cast(OpCode::LOAD)); + tape.push_back(reg); + std::array tmpBuffer; + std::memcpy(tmpBuffer.begin(), &iter->second, sizeof(uint32_t)); + for (auto byte : tmpBuffer) tape.push_back(byte); + spillSlots.push_back(iter->second); + spills.erase(iter); + } + insertLru({inst, operand, reg}); + return reg; + }; + auto getUses = [&](Operand operand) { + if (operand.isResult()) { + return &opUses[operand.toInstIndex()]; + } else if (operand.isConst()) { + return &constantUses[operand.toConstIndex()]; + } else { + return static_cast *>(nullptr); + } + }; + auto updateLru = [&](Operand operand, size_t inst) { + std::vector *uses = getUses(operand); + if (uses == nullptr) return; + auto i = std::distance( + uses->begin(), std::lower_bound(uses->begin(), uses->end(), inst)); + if (i == 0 && !operand.isResult()) return; + size_t lastUse = i == 0 ? operand.toInstIndex() : uses->at(i - 1); + // when finding the entry, register field doesn't matter + auto iter = std::lower_bound(lru.begin(), lru.end(), + LruEntry{lastUse, operand, 0}); + if (iter != lru.end() && iter->operand == operand) { + auto entry = *iter; + entry.lastUse = inst; + lru.erase(iter); + insertLru(entry); + } + }; + std::array regs; + for (size_t i : {0, 1, 2}) updateLru(instOperands[i], inst); + for (size_t i : {0, 1, 2}) regs[i] = getReg(instOperands[i], inst); + // after potential rematerialization, see if they are at the end of their + // lifetime + for (size_t i : {0, 1, 2}) { + if (!instOperands[i].isConst() && !instOperands[i].isResult()) continue; + if (getUses(instOperands[i])->back() != inst) continue; + // remove from lru, note that it is possible that it can be removed + // earlier from another operand + for (auto it = lru.rbegin(); it != lru.rend(); ++it) { + if (it->lastUse != inst) break; + if (it->reg == regs[i]) { + availableReg.push_back(regs[i]); + lru.erase(std::next(it).base()); + } + } + } + return regs; }; - // FIXME: handle spills for (size_t i = 0; i < operations.size(); i++) { - if (operations[i] == OpCode::NOP) { - opToReg.push_back(0); - continue; - } + if (operations[i] == OpCode::NOP) continue; + auto tmp = handleOperands(operands[i], i); if (operations[i] == OpCode::RETURN) { - auto operand = operands[i][0]; tape.push_back(static_cast(operations[i])); - tape.push_back(getReg(operand)); + tape.push_back(tmp[0]); break; } - // free up operand registers if possible - for (auto operand : operands[i]) { - if (!operand.isResult()) continue; - auto operandInst = operand.toInstIndex(); - // not the last instruction, cannot free it up - if (opUses[operandInst].back() != i) continue; - uint8_t reg = opToReg[operandInst]; - // already freed, probably due to identical arguments - if (!regUsed[reg]) continue; - regUsed[reg] = false; - availableReg.push_back(reg); - } // allocate register - uint8_t reg; - if (availableReg.empty()) { - // GG if we used too many registers, need spilling - if (buffer.size() == 255) { - // just return some nonsense that will not crash - return {{static_cast(OpCode::RETURN), 0}, {0.0}}; - } - reg = buffer.size(); - buffer.push_back(0.0); - regUsed.push_back(true); - } else { - reg = availableReg.back(); - availableReg.pop_back(); - regUsed[reg] = true; - } - opToReg.push_back(reg); + uint8_t reg = allocateReg(); + insertLru({i, Operand{static_cast(i) + 1}, reg}); tape.push_back(static_cast(operations[i])); tape.push_back(reg); - for (auto operand : operands[i]) { - if (operand.isNone()) break; - tape.push_back(getReg(operand)); + for (size_t j : {0, 1, 2}) { + if (operands[i][j].isNone()) break; + tape.push_back(tmp[j]); } } return std::make_pair(std::move(tape), std::move(buffer)); diff --git a/src/sdf/tape.h b/src/sdf/tape.h index e80695b8a..55274614b 100644 --- a/src/sdf/tape.h +++ b/src/sdf/tape.h @@ -15,9 +15,9 @@ #include #include -#include #include #include +#include #include "interval.h" #include "manifold/vec_view.h" @@ -27,7 +27,9 @@ namespace manifold::sdf { enum class OpCode : uint8_t { NOP, RETURN, - // CONST, + CONST, + STORE, + LOAD, // unary operations ABS, @@ -69,8 +71,6 @@ struct EvalContext { VecView tape; VecView buffer; - Domain& operand(uint32_t x) { return buffer[x]; } - static Domain handle_unary(OpCode op, Domain operand); static Domain handle_binary(OpCode op, Domain lhs, Domain rhs); @@ -86,42 +86,52 @@ struct EvalContext { // loop is needed to force the compiler to use a tight code layout do { size_t result = tape[i + 1]; - Domain lhs = operand(tape[i + 2]); - Domain rhs = operand(tape[i + 3]); + Domain lhs = buffer[tape[i + 2]]; + Domain rhs = buffer[tape[i + 3]]; i += 4; if (current <= OpCode::MUL) { if (current == OpCode::ADD) - operand(result) = lhs + rhs; + buffer[result] = lhs + rhs; else if (current == OpCode::SUB) - operand(result) = lhs - rhs; + buffer[result] = lhs - rhs; else - operand(result) = lhs * rhs; + buffer[result] = lhs * rhs; } else { - Domain z = operand(tape[i++]); + Domain z = buffer[tape[i++]]; if (current == OpCode::FMA) - operand(result) = lhs * rhs + z; + buffer[result] = lhs * rhs + z; else - operand(result) = handle_choice(lhs, rhs, z); + buffer[result] = handle_choice(lhs, rhs, z); } current = static_cast(tape[i]); } while (current >= OpCode::ADD); } if (current >= OpCode::DIV) { - Domain lhs = operand(tape[i + 2]); - Domain rhs = operand(tape[i + 3]); - operand(tape[i + 1]) = handle_binary(current, lhs, rhs); + Domain lhs = buffer[tape[i + 2]]; + Domain rhs = buffer[tape[i + 3]]; + buffer[tape[i + 1]] = handle_binary(current, lhs, rhs); i += 4; } else if (current >= OpCode::ABS) { - Domain x = operand(tape[i + 2]); - operand(tape[i + 1]) = handle_unary(current, x); + Domain x = buffer[tape[i + 2]]; + buffer[tape[i + 1]] = handle_unary(current, x); i += 3; - // } else if (current == OpCode::CONST) { - // double x; - // std::memcpy(&x, tape.data() + i + 2, sizeof(x)); - // operand(tape[i + 1]) = x; - // i += 2 + sizeof(x); + } else if (current == OpCode::CONST) { + double x; + std::memcpy(&x, tape.data() + i + 2, sizeof(x)); + buffer[tape[i + 1]] = Domain(x); + i += 2 + sizeof(x); + } else if (current == OpCode::LOAD) { + uint32_t x; + std::memcpy(&x, tape.data() + i + 2, sizeof(x)); + buffer[tape[i + 1]] = buffer[x]; + i += 2 + sizeof(x); + } else if (current == OpCode::STORE) { + uint32_t x; + std::memcpy(&x, tape.data() + i + 1, sizeof(x)); + buffer[x] = buffer[tape[i + 1 + sizeof(x)]]; + i += 2 + sizeof(x); } else if (current == OpCode::RETURN) { - return operand(tape[i + 1]); + return buffer[tape[i + 1]]; } else { i += 1; } @@ -281,4 +291,73 @@ inline Interval EvalContext>::handle_choice( return lhs.merge(rhs); } +inline std::string dumpOpCode(OpCode op) { + switch (op) { + case OpCode::NOP: + return "NOP"; + case OpCode::RETURN: + return "RETURN"; + case OpCode::CONST: + return "CONST"; + case OpCode::LOAD: + return "LOAD"; + case OpCode::STORE: + return "STORE"; + case OpCode::ABS: + return "ABS"; + case OpCode::NEG: + return "NEG"; + case OpCode::EXP: + return "EXP"; + case OpCode::LOG: + return "LOG"; + case OpCode::SQRT: + return "SQRT"; + case OpCode::FLOOR: + return "FLOOR"; + case OpCode::CEIL: + return "CEIL"; + case OpCode::ROUND: + return "ROUND"; + case OpCode::SIN: + return "SIN"; + case OpCode::COS: + return "COS"; + case OpCode::TAN: + return "TAN"; + case OpCode::ASIN: + return "ASIN"; + case OpCode::ACOS: + return "ACOS"; + case OpCode::ATAN: + return "ATAN"; + case OpCode::DIV: + return "DIV"; + case OpCode::MOD: + return "MOD"; + case OpCode::MIN: + return "MIN"; + case OpCode::MAX: + return "MAX"; + case OpCode::EQ: + return "EQ"; + case OpCode::GT: + return "GT"; + case OpCode::AND: + return "AND"; + case OpCode::OR: + return "OR"; + case OpCode::ADD: + return "ADD"; + case OpCode::SUB: + return "SUB"; + case OpCode::MUL: + return "MUL"; + case OpCode::FMA: + return "FMA"; + case OpCode::CHOICE: + return "CHOICE"; + } +} + } // namespace manifold::sdf diff --git a/src/sdf/value.h b/src/sdf/value.h index 6eda08bf4..220243dab 100644 --- a/src/sdf/value.h +++ b/src/sdf/value.h @@ -14,7 +14,6 @@ #pragma once #include -#include #include #include From 4a9f62ed7c26068391bd23f8c754400ec381904d Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 02:38:06 +0800 Subject: [PATCH 13/37] pleaase msvc --- src/sdf/context.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index e4aa1a75e..781d7b2ea 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -367,7 +367,7 @@ std::pair, std::vector> Context::genTape() { spills.insert({lru.front().operand, slot}); tape.push_back(static_cast(OpCode::STORE)); std::array tmpBuffer; - std::memcpy(tmpBuffer.begin(), &slot, sizeof(uint32_t)); + std::memcpy(tmpBuffer.data(), &slot, sizeof(uint32_t)); for (auto byte : tmpBuffer) tape.push_back(byte); auto reg = lru.front().reg; tape.push_back(reg); @@ -401,14 +401,14 @@ std::pair, std::vector> Context::genTape() { tape.push_back(static_cast(OpCode::CONST)); tape.push_back(reg); std::array tmpBuffer; - std::memcpy(tmpBuffer.begin(), &constants[operand.toConstIndex()], + std::memcpy(tmpBuffer.data(), &constants[operand.toConstIndex()], sizeof(double)); for (auto byte : tmpBuffer) tape.push_back(byte); } else { tape.push_back(static_cast(OpCode::LOAD)); tape.push_back(reg); std::array tmpBuffer; - std::memcpy(tmpBuffer.begin(), &iter->second, sizeof(uint32_t)); + std::memcpy(tmpBuffer.data(), &iter->second, sizeof(uint32_t)); for (auto byte : tmpBuffer) tape.push_back(byte); spillSlots.push_back(iter->second); spills.erase(iter); From 9f01e3f58e26945b57c24bd6338d37f998ba17b7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 21:34:57 +0800 Subject: [PATCH 14/37] optimize a bit --- src/sdf/context.cpp | 93 ++++------ src/sdf/context.h | 43 ++++- src/sdf/small_vector.h | 395 +++++++++++++++++++++++++++++++++++++++++ src/sdf/tape.h | 1 + src/sdf/value.cpp | 14 +- src/sdf/value.h | 2 +- test/sdf_tape_test.cpp | 30 ++-- 7 files changed, 493 insertions(+), 85 deletions(-) create mode 100644 src/sdf/small_vector.h diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 781d7b2ea..10dcf38d0 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -22,20 +22,12 @@ #include "manifold/optional_assert.h" -template <> -struct std::hash { - size_t operator()(const manifold::sdf::Operand &operand) const { - return std::hash()(operand.id); - } -}; - namespace manifold::sdf { - void Context::dump() const { #ifdef MANIFOLD_DEBUG for (size_t i = 0; i < operations.size(); i++) { std::cout << i << " "; - std::cout << " "; + std::cout << " " << dumpOpCode(operations[i]) << " "; for (Operand operand : operands[i]) { if (operand.isNone()) break; if (operand.isResult()) @@ -102,11 +94,7 @@ Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, // constant propagation bool all_constants = true; for (auto operand : {a, b, c}) { - if (operand.isNone()) break; - if (!operand.isConst()) { - all_constants = false; - break; - } + if (!operand.isConst() && !operand.isNone()) all_constants = false; } if (all_constants) { tmpTape.clear(); @@ -132,7 +120,7 @@ Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, opUses.emplace_back(); // update uses for (auto operand : {a, b, c}) { - std::vector *target; + small_vector *target; if (operand.isResult()) { target = &opUses[operand.toInstIndex()]; } else if (operand.isConst()) { @@ -155,13 +143,10 @@ void Context::optimizeFMA() { operations[i] = OpCode::FMA; Operand a = operands[lhsInst][0]; Operand b = operands[lhsInst][1]; - operands[i][0] = a; - operands[i][1] = b; - operands[i][2] = rhs; + operands[i] = {a, b, rhs}; // remove instruction operations[lhsInst] = OpCode::NOP; - operands[lhsInst][0] = Operand::none(); - operands[lhsInst][1] = Operand::none(); + operands[lhsInst] = {Operand::none(), Operand::none(), Operand::none()}; // update uses, note that we need to maintain the order of the indices opUses[lhsInst].clear(); auto updateUses = [&](Operand x) { @@ -173,7 +158,7 @@ void Context::optimizeFMA() { uses.erase(iter1); auto iter2 = std::lower_bound(uses.begin(), uses.end(), i); // make sure there is no duplicate - if (iter2 == uses.end() || *iter2 != i + 1) uses.insert(iter2, i); + if (iter2 == uses.end() || *iter2 != i) uses.insert(iter2, i); }; updateUses(a); if (a != b) updateUses(b); @@ -202,18 +187,19 @@ void Context::reschedule() { opUses.clear(); for (auto &uses : constantUses) uses.clear(); - std::unordered_map computedInst; + std::vector computedInst(oldOperands.size(), Operand::none()); std::vector stack; + stack.reserve(64); if (oldOperands.back()[0].isResult()) stack.push_back(oldOperands.back()[0].toInstIndex()); - std::unordered_map bitset; - std::unordered_map distances; + std::vector bitset(oldOperands.size(), 0); + std::vector distances(oldOperands.size(), 0); std::vector tmpStack; + tmpStack.reserve(64); auto requiresComputation = [&computedInst](Operand operand) { - return operand.isResult() && - computedInst.find(operand.toInstIndex()) == computedInst.end(); + return operand.isResult() && computedInst[operand.toInstIndex()].isNone(); }; auto toNewOperand = [&computedInst](Operand old) { if (old.isResult()) return computedInst[old.toInstIndex()]; @@ -241,14 +227,8 @@ void Context::reschedule() { auto current = tmpStack.back(); tmpStack.pop_back(); // already computed - if (computedInst.find(current) != computedInst.end()) continue; - auto iter = bitset.find(current); - if (iter == bitset.end()) { - // new dependency - bitset.insert({current, 1 << numResults}); - } else { - iter->second |= 1 << numResults; - } + if (!computedInst[current].isNone()) continue; + bitset[current] |= 1 << numResults; for (auto x : oldOperands[current]) { if (!x.isResult()) continue; tmpStack.push_back(x.toInstIndex()); @@ -271,29 +251,27 @@ void Context::reschedule() { auto inst = x.toInstIndex(); // computed, doesn't affect distance - if (computedInst.find(inst) != computedInst.end()) continue; + if (!computedInst[inst].isNone()) continue; - auto iter1 = bitset.find(inst); - DEBUG_ASSERT(iter1 != bitset.end(), logicErr, "should be found"); // shared dependency between operands, also doesn't affect distance - if ((iter1->second & mask) == mask) continue; + if ((bitset[inst] & mask) == mask) continue; - auto iter2 = distances.find(inst); - if (iter2 == distances.end()) { + auto d = distances[inst]; + if (d == 0) { // not computed tmpStack.push_back(x.toInstIndex()); maxDistance = std::numeric_limits::max(); } else { - maxDistance = std::max(maxDistance, iter2->second); + maxDistance = std::max(maxDistance, d); } } if (maxDistance != std::numeric_limits::max()) { tmpStack.pop_back(); - distances.insert({current, maxDistance + 1}); + distances[current] = maxDistance + 1; } } costs[i] = distances[operand.toInstIndex()]; - distances.clear(); + std::fill(distances.begin(), distances.end(), 0); } std::sort(ids.begin(), ids.end(), [&costs](size_t x, size_t y) { return costs[x] < costs[y]; }); @@ -303,7 +281,7 @@ void Context::reschedule() { if (requiresComputation(curOperands[x])) stack.push_back(curOperands[x].toInstIndex()); - bitset.clear(); + std::fill(bitset.begin(), bitset.end(), 0); } else if (numResults == 1) { for (auto operand : curOperands) if (requiresComputation(operand)) @@ -313,7 +291,7 @@ void Context::reschedule() { Operand result = addInstructionNoCache( oldOperations[back], toNewOperand(curOperands[0]), toNewOperand(curOperands[1]), toNewOperand(curOperands[2])); - computedInst.insert({back, result}); + computedInst[back] = result; } } addInstructionNoCache(OpCode::RETURN, @@ -325,19 +303,15 @@ struct LruEntry { Operand operand; uint8_t reg; - bool operator<(const LruEntry &other) const { + inline bool operator<(const LruEntry &other) const { return lastUse < other.lastUse || (lastUse == other.lastUse && operand.id < other.operand.id); } }; -std::pair, std::vector> Context::genTape() { +std::pair, size_t> Context::genTape() { std::vector tape; - std::vector buffer; - std::vector constantToReg; - for (int i : {0, 1, 2}) // x, y, z - buffer.push_back(0.0); - + size_t bufferSize = 3; std::vector availableReg; // we may want to make this wrap around... std::vector lru; @@ -355,11 +329,10 @@ std::pair, std::vector> Context::genTape() { } // used too many registers, need to spill something // note: tested with a limit of 10, spills correctly - if (buffer.size() > 255) { + if (bufferSize > 255) { uint32_t slot; if (spillSlots.empty()) { - slot = buffer.size(); - buffer.push_back(0.0); + slot = bufferSize++; } else { slot = spillSlots.back(); spillSlots.pop_back(); @@ -374,8 +347,7 @@ std::pair, std::vector> Context::genTape() { lru.erase(lru.begin()); return reg; } - auto reg = static_cast(buffer.size()); - buffer.push_back(0.0); + auto reg = static_cast(bufferSize++); return reg; }; auto handleOperands = [&](std::array instOperands, size_t inst) { @@ -422,11 +394,11 @@ std::pair, std::vector> Context::genTape() { } else if (operand.isConst()) { return &constantUses[operand.toConstIndex()]; } else { - return static_cast *>(nullptr); + return static_cast *>(nullptr); } }; auto updateLru = [&](Operand operand, size_t inst) { - std::vector *uses = getUses(operand); + const auto uses = getUses(operand); if (uses == nullptr) return; auto i = std::distance( uses->begin(), std::lower_bound(uses->begin(), uses->end(), inst)); @@ -471,7 +443,6 @@ std::pair, std::vector> Context::genTape() { tape.push_back(tmp[0]); break; } - // allocate register uint8_t reg = allocateReg(); insertLru({i, Operand{static_cast(i) + 1}, reg}); tape.push_back(static_cast(operations[i])); @@ -481,7 +452,7 @@ std::pair, std::vector> Context::genTape() { tape.push_back(tmp[j]); } } - return std::make_pair(std::move(tape), std::move(buffer)); + return std::make_pair(std::move(tape), bufferSize); } } // namespace manifold::sdf diff --git a/src/sdf/context.h b/src/sdf/context.h index f8a0cbdbd..0f4d125b0 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -13,11 +13,11 @@ // limitations under the License. #pragma once -#include #include #include #include +#include "small_vector.h" #include "tape.h" namespace manifold::sdf { @@ -35,7 +35,39 @@ struct Operand { bool operator!=(const Operand& other) const { return id != other.id; } bool operator<(const Operand& other) const { return id < other.id; } }; +} // namespace manifold::sdf + +using namespace manifold::sdf; + +inline void hash_combine(std::size_t& seed) {} +template +inline void hash_combine(std::size_t& seed, const T& v, Rest... rest) { + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); + hash_combine(seed, rest...); +} + +template <> +struct std::hash { + size_t operator()(const Operand& operand) const { + return std::hash()(operand.id); + } +}; + +template <> +struct std::hash>> { + size_t operator()( + const std::pair>& pair) + const { + size_t h = std::hash()(static_cast(pair.first)); + hash_combine(h, std::get<0>(pair.second), std::get<1>(pair.second), + std::get<2>(pair.second)); + return h; + } +}; + +namespace manifold::sdf { class Context { public: Operand addConstant(double d); @@ -45,7 +77,7 @@ class Context { void optimizeFMA(); void reschedule(); - std::pair, std::vector> genTape(); + std::pair, size_t> genTape(); void dump() const; @@ -56,11 +88,11 @@ class Context { std::vector constants; // constant use vector, elements are instruction indices // constant with ID -4 is mapped to 0, etc. - std::vector> constantUses; + std::vector> constantUses; // instructions, index 0 is mapped to ID 1, etc. std::vector operations; // instruction value use vector, elements are instruction indices - std::vector> opUses; + std::vector> opUses; // operands, 0 is invalid (uses fewer operands) // +ve are instruction results // -ve are constants @@ -68,7 +100,8 @@ class Context { std::vector tmpTape; std::vector tmpBuffer; - std::map>, Operand> + std::unordered_map>, + Operand> cache; Operand addInstructionNoCache(OpCode op, Operand a = Operand::none(), diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h new file mode 100644 index 000000000..492e56b0d --- /dev/null +++ b/src/sdf/small_vector.h @@ -0,0 +1,395 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// rewrite of https://github.com/p-ranav/small_vector +#pragma once +#include +#include + +namespace manifold { + +// note that this will not work with non-trivial data (custom +// constructor/destructor) +template +class small_vector { + std::array stack_; + std::vector heap_; + std::size_t size_{0}; + + public: + using value_type = T; + using size_type = std::size_t; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = T *; + using const_pointer = const T *; + + small_vector() = default; + + explicit small_vector(size_type count, const T &value = T()) { + if (count <= N) { + std::fill(stack_.begin(), stack_.begin() + count, value); + } else { + // use heap + heap_.resize(count, value); + } + size_ = count; + } + + small_vector(const small_vector &other) + : stack_(other.stack_), heap_(other.heap_), size_(other.size_) {} + + small_vector(small_vector &&other) + : stack_(std::move(other.stack_)), + heap_(std::move(other.heap_)), + size_(other.size_) {} + + small_vector(std::initializer_list initlist) { + const auto input_size = initlist.size(); + if (input_size <= N) { + std::copy(initlist.begin(), initlist.end(), stack_.begin()); + } else { + std::copy(initlist.begin(), initlist.end(), std::back_inserter(heap_)); + } + size_ = input_size; + } + + small_vector &operator=(const small_vector &rhs) { + stack_ = rhs.stack_; + heap_ = rhs.heap_; + size_ = rhs.size_; + return *this; + } + + small_vector &operator=(small_vector &&rhs) { + stack_ = std::move(rhs.stack_); + heap_ = std::move(rhs.heap_); + size_ = rhs.size_; + rhs.size_ = 0; + return *this; + } + + small_vector &operator=(std::initializer_list rhs) { + if (rhs.size() <= N) { + stack_ = rhs; + } else { + heap_ = rhs; + } + size_ = rhs.size(); + } + + reference at(size_type pos) { + if (size_ <= N) { + return stack_.at(pos); + } else { + return heap_.at(pos); + } + } + + const_reference at(size_type pos) const { + if (size_ <= N) { + return stack_.at(pos); + } else { + return heap_.at(pos); + } + } + + reference operator[](size_type pos) { + if (size_ <= N) { + return stack_[pos]; + } else { + return heap_[pos]; + } + } + + const_reference operator[](size_type pos) const { + if (size_ <= N) { + return stack_[pos]; + } else { + return heap_[pos]; + } + } + + reference front() { + if (size_ <= N) { + return stack_.front(); + } else { + return heap_.front(); + } + } + + const_reference front() const { + if (size_ <= N) { + return stack_.front(); + } else { + return heap_.front(); + } + } + + reference back() { + if (size_ <= N) { + return stack_[size_ - 1]; + } else { + return heap_[size_ - 1]; + } + } + + const_reference back() const { + if (size_ <= N) { + return stack_[size_ - 1]; + } else { + return heap_[size_ - 1]; + } + } + + pointer data() noexcept { + if (size_ <= N) { + return stack_.data(); + } else { + return heap_.data(); + } + } + + const_pointer data() const noexcept { + if (size_ <= N) { + return stack_.data(); + } else { + return heap_.data(); + } + } + + bool empty() const { return size_ == 0; } + + size_type size() const { return size_; } + + void shrink_to_fit() { + if (size_ > N) { + heap_.shrink_to_fit(); + } + } + + void push_back(const T &value) { + if (size_ < N) { + stack_[size_] = value; + } else { + if (size_ == N) { + std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); + } + heap_.emplace_back(value); + } + size_ += 1; + } + + void pop_back() { + if (size_ == 0) return; + if (size_ <= N) { + size_ -= 1; + } else { + // currently using heap + heap_.pop_back(); + size_ -= 1; + // now check if all data can fit on stack + // if so, move back to stack + if (size_ <= N) { + std::move(heap_.begin(), heap_.end(), stack_.begin()); + heap_.clear(); + } + } + } + + // Resizes the container to contain count elements. + void resize(size_type count, T value = T()) { + if (count <= N) { + // new `count` of elements completely fit on stack + if (size_ >= N) { + // currently, all data on heap + // move back to stack + std::move(heap_.begin(), heap_.end(), stack_.begin()); + } else { + // all data already on stack + // just update size + } + } else { + // new `count` of data is going to be on the heap + // check if data is currently on the stack + if (size_ <= N) { + // move to heap + std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); + } + heap_.resize(count, value); + } + size_ = count; + } + + void clear() { + if (size_ > N) { + heap_.clear(); + } + size_ = 0; + } + + void swap(small_vector &other) noexcept { + std::swap(stack_, other.stack_); + std::swap(heap_, other.heap_); + std::swap(size_, other.size_); + }; + + class iterator { + public: + using self_type = iterator; + using value_type = T; + using reference = T &; + using pointer = T *; + using difference_type = int; + using iterator_category = std::bidirectional_iterator_tag; + iterator(pointer ptr) : ptr_(ptr) {} + self_type operator++() { + ptr_++; + return *this; + } + self_type operator++(int) { + self_type i = *this; + ptr_++; + return i; + } + self_type operator--() { + ptr_--; + return *this; + } + self_type operator--(int) { + self_type i = *this; + ptr_--; + return i; + } + self_type operator+(size_t i) { return self_type(ptr_ + i); } + self_type operator-(size_t i) { return self_type(ptr_ - i); } + reference operator*() { return *ptr_; } + pointer operator->() { return ptr_; } + bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } + bool operator!=(const self_type &rhs) { return ptr_ != rhs.ptr_; } + + private: + pointer ptr_; + }; + + class const_iterator { + public: + using self_type = const_iterator; + using value_type = T; + using reference = const T &; + using pointer = const T *; + using difference_type = int; + using iterator_category = std::bidirectional_iterator_tag; + const_iterator(pointer ptr) : ptr_(ptr) {} + self_type operator++() { + ptr_++; + return *this; + } + self_type operator++(int) { + self_type i = *this; + ptr_++; + return i; + } + self_type operator--() { + ptr_--; + return *this; + } + self_type operator--(int) { + self_type i = *this; + ptr_--; + return i; + } + self_type operator+(size_t i) { return self_type(ptr_ + i); } + self_type operator-(size_t i) { return self_type(ptr_ - i); } + const value_type &operator*() { return *ptr_; } + const pointer operator->() { return ptr_; } + bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } + bool operator!=(const self_type &rhs) { return ptr_ != rhs.ptr_; } + + private: + pointer ptr_; + }; + + iterator begin() { + if (size_ <= N) { + return iterator(stack_.data()); + } else { + return iterator(heap_.data()); + } + } + + iterator end() { + if (size_ <= N) { + return iterator(stack_.data() + size_); + } else { + return iterator(heap_.data() + size_); + } + } + + const_iterator cbegin() const { + if (size_ <= N) { + return const_iterator(stack_.data()); + } else { + return const_iterator(heap_.data()); + } + } + + const_iterator cend() const { + if (size_ <= N) { + return const_iterator(stack_.data() + size_); + } else { + return const_iterator(heap_.data() + size_); + } + } + + const_iterator begin() const { return cbegin(); } + + const_iterator end() const { return cend(); } + + void erase(iterator iter) { + size_t i = std::distance(begin(), iter); + if (size_ <= N) { + std::move_backward(stack_.begin() + i + 1, stack_.begin() + size_, + stack_.begin() + i); + } else { + heap_.erase(heap_.begin() + i); + } + size_ -= 1; + } + + void erase(const_iterator iter) { + erase(begin() + std::distance(cbegin(), iter)); + } + + void insert(iterator iter, const T &value) { + size_t i = std::distance(begin(), iter); + if (size_ < N) { + if (i + 1 < size_) + std::move_backward(stack_.begin() + i, stack_.begin() + size_, + stack_.begin() + i + 1); + stack_[i] = value; + } else { + if (size_ == N) + std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); + heap_.insert(heap_.begin() + i, value); + } + size_ += 1; + } + + void insert(const_iterator iter, const T &value) { + insert(cbegin() + std::distance(cbegin(), iter), value); + } +}; + +} // namespace manifold diff --git a/src/sdf/tape.h b/src/sdf/tape.h index 55274614b..f4c403f6a 100644 --- a/src/sdf/tape.h +++ b/src/sdf/tape.h @@ -358,6 +358,7 @@ inline std::string dumpOpCode(OpCode op) { case OpCode::CHOICE: return "CHOICE"; } + return ""; } } // namespace manifold::sdf diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index b4fded75b..2dcaa1809 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -184,17 +184,17 @@ Value Value::atan() const { Invalid())); } -std::pair, std::vector> Value::genTape() const { +std::pair, size_t> Value::genTape() const { using VO = std::shared_ptr; Context ctx; - std::unordered_map cache; - std::vector stack; - if (kind == ValueKind::OPERATION) stack.push_back(std::get(v)); + std::unordered_map cache; + std::vector stack; + if (kind == ValueKind::OPERATION) stack.push_back(std::get(v).get()); auto getOperand = [&](Value x, std::function f) { switch (x.kind) { case ValueKind::OPERATION: { - auto iter = cache.find(std::get(x.v)); + auto iter = cache.find(std::get(x.v).get()); if (iter != cache.end()) return iter->second; f(x); return Operand::none(); @@ -213,9 +213,9 @@ std::pair, std::vector> Value::genTape() const { }; while (!stack.empty()) { bool ready = true; - VO current = stack.back(); + auto current = stack.back(); auto f = [&](Value x) { - stack.push_back(std::get(x.v)); + stack.push_back(std::get(x.v).get()); ready = false; }; Operand a = getOperand(current->operands[0], f); diff --git a/src/sdf/value.h b/src/sdf/value.h index 220243dab..b7fa73d6d 100644 --- a/src/sdf/value.h +++ b/src/sdf/value.h @@ -62,7 +62,7 @@ class Value { Value atan() const; // internal use only - std::pair, std::vector> genTape() const; + std::pair, size_t> genTape() const; private: ValueKind kind = ValueKind::INVALID; diff --git a/test/sdf_tape_test.cpp b/test/sdf_tape_test.cpp index 1065c95d4..aef8c46b9 100644 --- a/test/sdf_tape_test.cpp +++ b/test/sdf_tape_test.cpp @@ -72,8 +72,9 @@ TEST(TAPE, Gyroid) { std::cos(p.z) * std::sin(p.x); }; - EvalContext ctxSimple{ - tape.first, VecView(tape.second.data(), tape.second.size())}; + std::vector buffer(tape.second, 0.0); + EvalContext ctxSimple{tape.first, + VecView(buffer.data(), buffer.size())}; for (double x = -period; x < period; x += period / n) { for (double y = -period; y < period; y += period / n) { for (double z = -period; z < period; z += period / n) { @@ -85,8 +86,8 @@ TEST(TAPE, Gyroid) { } } - std::vector> intervalBuffer; - for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); + std::vector> intervalBuffer(tape.second, + Interval::constant(0.0)); EvalContext> ctx{ tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; auto start = std::chrono::high_resolution_clock::now(); @@ -132,7 +133,13 @@ TEST(TAPE, Blobs) { else d = d - tmp; } + auto start = std::chrono::high_resolution_clock::now(); auto tape = d.genTape(); + auto end = std::chrono::high_resolution_clock::now(); + auto time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("codegen time: %dus, %ld\n", time, tape.first.size()); auto blobs = [&balls](vec3 p) { double d = 0; @@ -142,8 +149,9 @@ TEST(TAPE, Blobs) { } return d; }; - EvalContext ctxSimple{ - tape.first, VecView(tape.second.data(), tape.second.size())}; + std::vector buffer(tape.second, 0.0); + EvalContext ctxSimple{tape.first, + VecView(buffer.data(), buffer.size())}; for (double x = -5; x < 5; x += 0.05) { for (double y = -5; y < 5; y += 0.05) { for (double z = -5; z < 5; z += 0.05) { @@ -155,15 +163,15 @@ TEST(TAPE, Blobs) { } } - std::vector> intervalBuffer; - for (auto d : tape.second) intervalBuffer.push_back(Interval(d)); + std::vector> intervalBuffer(tape.second, + Interval::constant(0.0)); EvalContext> ctx{ tape.first, VecView(intervalBuffer.data(), intervalBuffer.size())}; - auto start = std::chrono::high_resolution_clock::now(); + start = std::chrono::high_resolution_clock::now(); std::cout << recursive_interval(ctx, vec3(-5), vec3(10), 0.05, 0.5) << std::endl; - auto end = std::chrono::high_resolution_clock::now(); - auto time = static_cast( + end = std::chrono::high_resolution_clock::now(); + time = static_cast( std::chrono::duration_cast(end - start) .count()); printf("interval evaluation: %dus\n", time); From 2a864ed0d8e2b9348e11529a5b534bf1be43deff Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 22:48:08 +0800 Subject: [PATCH 15/37] fix small_vector --- src/sdf/small_vector.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h index 492e56b0d..d749873aa 100644 --- a/src/sdf/small_vector.h +++ b/src/sdf/small_vector.h @@ -272,8 +272,8 @@ class small_vector { ptr_--; return i; } - self_type operator+(size_t i) { return self_type(ptr_ + i); } - self_type operator-(size_t i) { return self_type(ptr_ - i); } + self_type operator+(size_type i) { return self_type(ptr_ + i); } + self_type operator-(size_type i) { return self_type(ptr_ - i); } reference operator*() { return *ptr_; } pointer operator->() { return ptr_; } bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } @@ -310,8 +310,8 @@ class small_vector { ptr_--; return i; } - self_type operator+(size_t i) { return self_type(ptr_ + i); } - self_type operator-(size_t i) { return self_type(ptr_ - i); } + self_type operator+(size_type i) { return self_type(ptr_ + i); } + self_type operator-(size_type i) { return self_type(ptr_ - i); } const value_type &operator*() { return *ptr_; } const pointer operator->() { return ptr_; } bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } @@ -358,7 +358,7 @@ class small_vector { const_iterator end() const { return cend(); } void erase(iterator iter) { - size_t i = std::distance(begin(), iter); + size_type i = std::distance(begin(), iter); if (size_ <= N) { std::move_backward(stack_.begin() + i + 1, stack_.begin() + size_, stack_.begin() + i); @@ -373,7 +373,7 @@ class small_vector { } void insert(iterator iter, const T &value) { - size_t i = std::distance(begin(), iter); + size_type i = std::distance(begin(), iter); if (size_ < N) { if (i + 1 < size_) std::move_backward(stack_.begin() + i, stack_.begin() + size_, @@ -388,7 +388,7 @@ class small_vector { } void insert(const_iterator iter, const T &value) { - insert(cbegin() + std::distance(cbegin(), iter), value); + insert(begin() + std::distance(cbegin(), iter), value); } }; From 6bd7bfe8996bdf918767c0578f6a7c69aac2db15 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 22:48:19 +0800 Subject: [PATCH 16/37] update register allocation --- src/sdf/context.cpp | 145 ++++++++++++++++++++++---------------------- src/sdf/context.h | 10 +++ 2 files changed, 81 insertions(+), 74 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 10dcf38d0..495bafced 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -120,14 +120,8 @@ Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, opUses.emplace_back(); // update uses for (auto operand : {a, b, c}) { - small_vector *target; - if (operand.isResult()) { - target = &opUses[operand.toInstIndex()]; - } else if (operand.isConst()) { - target = &constantUses[operand.toConstIndex()]; - } else { - continue; - } + small_vector *target = getUses(operand); + if (target == nullptr) continue; // avoid duplicates if (target->empty() || target->back() != i) target->push_back(i); } @@ -151,14 +145,13 @@ void Context::optimizeFMA() { opUses[lhsInst].clear(); auto updateUses = [&](Operand x) { if (!x.isResult() && !x.isConst()) return; - auto &uses = x.isResult() ? opUses[x.toInstIndex()] - : constantUses[x.toConstIndex()]; - auto iter1 = std::lower_bound(uses.begin(), uses.end(), lhsInst); + auto uses = getUses(x); + auto iter1 = std::lower_bound(uses->cbegin(), uses->cend(), lhsInst); DEBUG_ASSERT(*iter1 == lhsInst, logicErr, "expected use"); - uses.erase(iter1); - auto iter2 = std::lower_bound(uses.begin(), uses.end(), i); + uses->erase(iter1); + auto iter2 = std::lower_bound(uses->cbegin(), uses->cend(), i); // make sure there is no duplicate - if (iter2 == uses.end() || *iter2 != i) uses.insert(iter2, i); + if (iter2 == uses->cend() || *iter2 != i) uses->insert(iter2, i); }; updateUses(a); if (a != b) updateUses(b); @@ -209,6 +202,10 @@ void Context::reschedule() { while (!stack.empty()) { int numResults = 0; auto back = stack.back(); + if (!computedInst[back].isNone()) { + stack.pop_back(); + continue; + } auto &curOperands = oldOperands[back]; for (auto operand : curOperands) if (requiresComputation(operand)) numResults += 1; @@ -298,28 +295,31 @@ void Context::reschedule() { computedInst[oldOperands.back()[0].toInstIndex()]); } -struct LruEntry { - size_t lastUse; +struct RegEntry { + size_t nextUse; Operand operand; uint8_t reg; - inline bool operator<(const LruEntry &other) const { - return lastUse < other.lastUse || - (lastUse == other.lastUse && operand.id < other.operand.id); + inline bool operator<(const RegEntry &other) const { + return nextUse > other.nextUse || + (nextUse == other.nextUse && operand.id < other.operand.id); } }; std::pair, size_t> Context::genTape() { std::vector tape; size_t bufferSize = 3; - std::vector availableReg; - // we may want to make this wrap around... - std::vector lru; std::unordered_map spills; std::vector spillSlots; - auto insertLru = [&](LruEntry entry) { - lru.insert(std::lower_bound(lru.begin(), lru.end(), entry), entry); + std::vector availableReg; + // register cache, ordered by next use of the variable + // when spilling is needed, we will evict the variable where the next use is + // the furthest + std::vector regCache; + auto insertRegCache = [&](RegEntry entry) { + regCache.insert(std::lower_bound(regCache.cbegin(), regCache.cend(), entry), + entry); }; auto allocateReg = [&]() { if (!availableReg.empty()) { @@ -328,7 +328,7 @@ std::pair, size_t> Context::genTape() { return reg; } // used too many registers, need to spill something - // note: tested with a limit of 10, spills correctly + // note: tested with a limit of 7, spills correctly if (bufferSize > 255) { uint32_t slot; if (spillSlots.empty()) { @@ -337,30 +337,31 @@ std::pair, size_t> Context::genTape() { slot = spillSlots.back(); spillSlots.pop_back(); } - spills.insert({lru.front().operand, slot}); + spills.insert({regCache.front().operand, slot}); tape.push_back(static_cast(OpCode::STORE)); std::array tmpBuffer; std::memcpy(tmpBuffer.data(), &slot, sizeof(uint32_t)); for (auto byte : tmpBuffer) tape.push_back(byte); - auto reg = lru.front().reg; + auto reg = regCache.front().reg; tape.push_back(reg); - lru.erase(lru.begin()); + regCache.erase(regCache.begin()); return reg; } auto reg = static_cast(bufferSize++); return reg; }; auto handleOperands = [&](std::array instOperands, size_t inst) { - auto getReg = [&](Operand operand, size_t inst) { + auto getOperandReg = [&](Operand operand, size_t inst) { + // will not be used, so we can return whatever we like if (operand.isNone()) return static_cast(0); - // special xyz + // special xyz variables with fixed register if (!operand.isConst() && !operand.isResult()) return static_cast(-(operand.id + 1)); - // Assume last use was updated, the operand, if present, must be at the - // end of the lru cache. Just do a linear scan from the back - for (auto it = lru.rbegin(); it != lru.rend(); ++it) { + // the operand, if present, must be at the end of the cache due to how the + // cache is ordered + for (auto it = regCache.rbegin(); it != regCache.rend(); ++it) { // no result - if (it->lastUse != inst) break; + if (it->nextUse != inst) break; if (it->operand == operand) { return it->reg; } @@ -385,52 +386,40 @@ std::pair, size_t> Context::genTape() { spillSlots.push_back(iter->second); spills.erase(iter); } - insertLru({inst, operand, reg}); + insertRegCache({inst, operand, reg}); return reg; }; - auto getUses = [&](Operand operand) { - if (operand.isResult()) { - return &opUses[operand.toInstIndex()]; - } else if (operand.isConst()) { - return &constantUses[operand.toConstIndex()]; - } else { - return static_cast *>(nullptr); - } - }; - auto updateLru = [&](Operand operand, size_t inst) { - const auto uses = getUses(operand); - if (uses == nullptr) return; - auto i = std::distance( - uses->begin(), std::lower_bound(uses->begin(), uses->end(), inst)); - if (i == 0 && !operand.isResult()) return; - size_t lastUse = i == 0 ? operand.toInstIndex() : uses->at(i - 1); - // when finding the entry, register field doesn't matter - auto iter = std::lower_bound(lru.begin(), lru.end(), - LruEntry{lastUse, operand, 0}); - if (iter != lru.end() && iter->operand == operand) { - auto entry = *iter; - entry.lastUse = inst; - lru.erase(iter); - insertLru(entry); - } - }; std::array regs; - for (size_t i : {0, 1, 2}) updateLru(instOperands[i], inst); - for (size_t i : {0, 1, 2}) regs[i] = getReg(instOperands[i], inst); - // after potential rematerialization, see if they are at the end of their - // lifetime + // note that we have to get the registers first, because we cannot spill the + // first register and reuse it in the second for example + for (size_t i : {0, 1, 2}) regs[i] = getOperandReg(instOperands[i], inst); + // update register cache for (size_t i : {0, 1, 2}) { if (!instOperands[i].isConst() && !instOperands[i].isResult()) continue; - if (getUses(instOperands[i])->back() != inst) continue; - // remove from lru, note that it is possible that it can be removed - // earlier from another operand - for (auto it = lru.rbegin(); it != lru.rend(); ++it) { - if (it->lastUse != inst) break; - if (it->reg == regs[i]) { - availableReg.push_back(regs[i]); - lru.erase(std::next(it).base()); + bool erased = false; + for (auto it = regCache.rbegin(); + it != regCache.rend() && it->nextUse == inst; ++it) { + if (it->operand == instOperands[i]) { + regCache.erase(std::next(it).base()); + erased = true; + break; } } + // if not found at the end of the cache, this means that it is handled by + // another operand + if (!erased) continue; + auto uses = getUses(instOperands[i]); + if (uses->back() == inst) { + // end of lifetime, free register + availableReg.push_back(regs[i]); + } else { + // insert it back with new next use + // because it is not at the end of its lifetime, the incremented + // iterator is guaranteed to be valid + insertRegCache( + {*(std::lower_bound(uses->cbegin(), uses->cend(), inst) + 1), + instOperands[i], regs[i]}); + } } return regs; }; @@ -443,8 +432,16 @@ std::pair, size_t> Context::genTape() { tape.push_back(tmp[0]); break; } + // note that we may spill the operand register, but that is fine uint8_t reg = allocateReg(); - insertLru({i, Operand{static_cast(i) + 1}, reg}); + auto instOp = Operand{static_cast(i) + 1}; + auto uses = getUses(instOp); + if (uses->empty()) { + // immediately available + availableReg.push_back(reg); + } else { + insertRegCache({uses->front(), instOp, reg}); + } tape.push_back(static_cast(operations[i])); tape.push_back(reg); for (size_t j : {0, 1, 2}) { diff --git a/src/sdf/context.h b/src/sdf/context.h index 0f4d125b0..45b3bc68d 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -107,6 +107,16 @@ class Context { Operand addInstructionNoCache(OpCode op, Operand a = Operand::none(), Operand b = Operand::none(), Operand c = Operand::none()); + + small_vector* getUses(Operand operand) { + if (operand.isResult()) { + return &opUses[operand.toInstIndex()]; + } else if (operand.isConst()) { + return &constantUses[operand.toConstIndex()]; + } else { + return static_cast*>(nullptr); + } + }; }; } // namespace manifold::sdf From 1045db79082dca8b5f84c6428472b03e64b52daa Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 25 Dec 2024 22:58:55 +0800 Subject: [PATCH 17/37] further fixes --- src/sdf/small_vector.h | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h index d749873aa..407d7b1bf 100644 --- a/src/sdf/small_vector.h +++ b/src/sdf/small_vector.h @@ -258,7 +258,7 @@ class small_vector { ptr_++; return *this; } - self_type operator++(int) { + self_type operator++(int) const { self_type i = *this; ptr_++; return i; @@ -267,17 +267,19 @@ class small_vector { ptr_--; return *this; } - self_type operator--(int) { + self_type operator--(int) const { self_type i = *this; ptr_--; return i; } - self_type operator+(size_type i) { return self_type(ptr_ + i); } - self_type operator-(size_type i) { return self_type(ptr_ - i); } + self_type operator+(size_type i) const { return self_type(ptr_ + i); } + self_type operator-(size_type i) const { return self_type(ptr_ - i); } reference operator*() { return *ptr_; } + const value_type &operator*() const { return *ptr_; } pointer operator->() { return ptr_; } - bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } - bool operator!=(const self_type &rhs) { return ptr_ != rhs.ptr_; } + const pointer operator->() const { return ptr_; } + bool operator==(const self_type &rhs) const { return ptr_ == rhs.ptr_; } + bool operator!=(const self_type &rhs) const { return ptr_ != rhs.ptr_; } private: pointer ptr_; @@ -296,7 +298,7 @@ class small_vector { ptr_++; return *this; } - self_type operator++(int) { + self_type operator++(int) const { self_type i = *this; ptr_++; return i; @@ -305,17 +307,17 @@ class small_vector { ptr_--; return *this; } - self_type operator--(int) { + self_type operator--(int) const { self_type i = *this; ptr_--; return i; } - self_type operator+(size_type i) { return self_type(ptr_ + i); } - self_type operator-(size_type i) { return self_type(ptr_ - i); } - const value_type &operator*() { return *ptr_; } - const pointer operator->() { return ptr_; } - bool operator==(const self_type &rhs) { return ptr_ == rhs.ptr_; } - bool operator!=(const self_type &rhs) { return ptr_ != rhs.ptr_; } + self_type operator+(size_type i) const { return self_type(ptr_ + i); } + self_type operator-(size_type i) const { return self_type(ptr_ - i); } + reference operator*() const { return *ptr_; } + pointer operator->() const { return ptr_; } + bool operator==(const self_type &rhs) const { return ptr_ == rhs.ptr_; } + bool operator!=(const self_type &rhs) const { return ptr_ != rhs.ptr_; } private: pointer ptr_; From c11d1e6ec2de2cecdd73e3c65a31497095024d16 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 26 Dec 2024 00:22:22 +0800 Subject: [PATCH 18/37] fix small_vector --- src/sdf/small_vector.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h index 407d7b1bf..88090f8cc 100644 --- a/src/sdf/small_vector.h +++ b/src/sdf/small_vector.h @@ -258,7 +258,7 @@ class small_vector { ptr_++; return *this; } - self_type operator++(int) const { + self_type operator++(int) { self_type i = *this; ptr_++; return i; @@ -267,7 +267,7 @@ class small_vector { ptr_--; return *this; } - self_type operator--(int) const { + self_type operator--(int) { self_type i = *this; ptr_--; return i; @@ -298,7 +298,7 @@ class small_vector { ptr_++; return *this; } - self_type operator++(int) const { + self_type operator++(int) { self_type i = *this; ptr_++; return i; @@ -307,7 +307,7 @@ class small_vector { ptr_--; return *this; } - self_type operator--(int) const { + self_type operator--(int) { self_type i = *this; ptr_--; return i; From 53523bbf9fe8469945009b922a2fd2ef7953db7e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 26 Dec 2024 00:22:53 +0800 Subject: [PATCH 19/37] simplify --- src/sdf/context.cpp | 58 ++++++++++++++++++++++++++------------------- src/sdf/context.h | 6 +++-- 2 files changed, 37 insertions(+), 27 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 495bafced..834b8a0ba 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -120,7 +120,7 @@ Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, opUses.emplace_back(); // update uses for (auto operand : {a, b, c}) { - small_vector *target = getUses(operand); + auto target = getUses(operand); if (target == nullptr) continue; // avoid duplicates if (target->empty() || target->back() != i) target->push_back(i); @@ -128,6 +128,11 @@ Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, return {static_cast(i) + 1}; } +Context::UsesVector::const_iterator findUse(const Context::UsesVector &uses, + size_t inst) { + return std::lower_bound(uses.cbegin(), uses.cend(), inst); +} + void Context::optimizeFMA() { auto tryApply = [&](size_t i, Operand lhs, Operand rhs) { if (!lhs.isResult()) return false; @@ -146,10 +151,10 @@ void Context::optimizeFMA() { auto updateUses = [&](Operand x) { if (!x.isResult() && !x.isConst()) return; auto uses = getUses(x); - auto iter1 = std::lower_bound(uses->cbegin(), uses->cend(), lhsInst); + auto iter1 = findUse(*uses, lhsInst); DEBUG_ASSERT(*iter1 == lhsInst, logicErr, "expected use"); uses->erase(iter1); - auto iter2 = std::lower_bound(uses->cbegin(), uses->cend(), i); + auto iter2 = findUse(*uses, i); // make sure there is no duplicate if (iter2 == uses->cend() || *iter2 != i) uses->insert(iter2, i); }; @@ -306,6 +311,13 @@ struct RegEntry { } }; +template +void addImmediate(std::vector &tape, T imm) { + std::array tmpBuffer; + std::memcpy(tmpBuffer.data(), &imm, sizeof(T)); + for (auto byte : tmpBuffer) tape.push_back(byte); +} + std::pair, size_t> Context::genTape() { std::vector tape; size_t bufferSize = 3; @@ -330,20 +342,21 @@ std::pair, size_t> Context::genTape() { // used too many registers, need to spill something // note: tested with a limit of 7, spills correctly if (bufferSize > 255) { - uint32_t slot; - if (spillSlots.empty()) { - slot = bufferSize++; - } else { - slot = spillSlots.back(); - spillSlots.pop_back(); - } - spills.insert({regCache.front().operand, slot}); - tape.push_back(static_cast(OpCode::STORE)); - std::array tmpBuffer; - std::memcpy(tmpBuffer.data(), &slot, sizeof(uint32_t)); - for (auto byte : tmpBuffer) tape.push_back(byte); auto reg = regCache.front().reg; - tape.push_back(reg); + // we can just discard constants, so only spill instruction results + if (regCache.front().operand.isResult()) { + uint32_t slot; + if (spillSlots.empty()) { + slot = bufferSize++; + } else { + slot = spillSlots.back(); + spillSlots.pop_back(); + } + spills.insert({regCache.front().operand, slot}); + tape.push_back(static_cast(OpCode::STORE)); + addImmediate(tape, slot); + tape.push_back(reg); + } regCache.erase(regCache.begin()); return reg; } @@ -359,9 +372,8 @@ std::pair, size_t> Context::genTape() { return static_cast(-(operand.id + 1)); // the operand, if present, must be at the end of the cache due to how the // cache is ordered - for (auto it = regCache.rbegin(); it != regCache.rend(); ++it) { - // no result - if (it->nextUse != inst) break; + for (auto it = regCache.rbegin(); + it != regCache.rend() && it->nextUse == inst; ++it) { if (it->operand == operand) { return it->reg; } @@ -380,9 +392,7 @@ std::pair, size_t> Context::genTape() { } else { tape.push_back(static_cast(OpCode::LOAD)); tape.push_back(reg); - std::array tmpBuffer; - std::memcpy(tmpBuffer.data(), &iter->second, sizeof(uint32_t)); - for (auto byte : tmpBuffer) tape.push_back(byte); + addImmediate(tape, iter->second); spillSlots.push_back(iter->second); spills.erase(iter); } @@ -416,9 +426,7 @@ std::pair, size_t> Context::genTape() { // insert it back with new next use // because it is not at the end of its lifetime, the incremented // iterator is guaranteed to be valid - insertRegCache( - {*(std::lower_bound(uses->cbegin(), uses->cend(), inst) + 1), - instOperands[i], regs[i]}); + insertRegCache({*(findUse(*uses, inst) + 1), instOperands[i], regs[i]}); } } return regs; diff --git a/src/sdf/context.h b/src/sdf/context.h index 45b3bc68d..f97fafcda 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -70,6 +70,8 @@ struct std::hash>> { namespace manifold::sdf { class Context { public: + using UsesVector = small_vector; + Operand addConstant(double d); Operand addInstruction(OpCode op, Operand a = Operand::none(), Operand b = Operand::none(), @@ -88,11 +90,11 @@ class Context { std::vector constants; // constant use vector, elements are instruction indices // constant with ID -4 is mapped to 0, etc. - std::vector> constantUses; + std::vector constantUses; // instructions, index 0 is mapped to ID 1, etc. std::vector operations; // instruction value use vector, elements are instruction indices - std::vector> opUses; + std::vector opUses; // operands, 0 is invalid (uses fewer operands) // +ve are instruction results // -ve are constants From f9602901df73ad2e0244697586964121e15392e5 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 00:44:55 +0800 Subject: [PATCH 20/37] simplify code --- src/sdf/context.cpp | 106 +++++++++++++++++++------------------------- src/sdf/context.h | 20 +++------ src/sdf/value.cpp | 5 ++- 3 files changed, 55 insertions(+), 76 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 834b8a0ba..80c612b54 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -56,7 +56,7 @@ Operand Context::addConstant(double d) { return result.first->second; } -Operand Context::addInstruction(OpCode op, Operand a, Operand b, Operand c) { +Operand Context::addInstruction(OpCode op, std::array operands) { switch (op) { case OpCode::ADD: case OpCode::MUL: @@ -68,58 +68,53 @@ Operand Context::addInstruction(OpCode op, Operand a, Operand b, Operand c) { case OpCode::FMA: // first two operands commutative, sort them // makes it more likely to find common subexpressions - if (a.id > b.id) std::swap(a, b); + if (operands[0].id > operands[1].id) std::swap(operands[0], operands[1]); break; default: break; } // common subexpression elimination - auto key = std::make_pair(op, std::make_tuple(a, b, c)); + auto key = std::make_pair(op, operands); auto entry = cache.find(key); if (entry != cache.end()) return entry->second; - auto result = addInstructionNoCache(op, a, b, c); + auto result = addInstructionNoCache(op, operands); cache.insert({key, result}); return result; } // bypass the cache because we don't expect to have more common subexpressions // after optimizations -Operand Context::addInstructionNoCache(OpCode op, Operand a, Operand b, - Operand c) { +Operand Context::addInstructionNoCache(OpCode op, + std::array operands) { // constant choice - if (op == OpCode::CHOICE && a.isConst()) { - if (constants[a.toConstIndex()] == 1.0) return b; - return c; + if (op == OpCode::CHOICE && operands[0].isConst()) { + if (constants[operands[0].toConstIndex()] == 1.0) return operands[1]; + return operands[2]; } // constant propagation bool all_constants = true; - for (auto operand : {a, b, c}) { + for (auto operand : operands) { if (!operand.isConst() && !operand.isNone()) all_constants = false; } if (all_constants) { - tmpTape.clear(); - tmpBuffer.clear(); - tmpTape.push_back(static_cast(op)); - tmpTape.push_back(0); - tmpBuffer.push_back(0.0); - for (Operand x : {a, b, c}) { + tmpTape = {static_cast(op), 0}; + tmpBuffer = {0.0}; + for (Operand x : operands) { if (!x.isConst()) break; tmpTape.push_back(tmpBuffer.size()); tmpBuffer.push_back(constants[x.toConstIndex()]); } - tmpTape.push_back(static_cast(OpCode::RETURN)); - tmpTape.push_back(0); - return addConstant(EvalContext{ - tmpTape, VecView(tmpBuffer.data(), tmpBuffer.size())} - .eval()); + tmpTape.insert(tmpTape.end(), {static_cast(OpCode::RETURN), 0}); + auto bufferView = VecView(tmpBuffer.data(), tmpBuffer.size()); + return addConstant(EvalContext{tmpTape, bufferView}.eval()); } size_t i = operations.size(); operations.push_back(op); - operands.push_back({a, b, c}); + this->operands.push_back(operands); opUses.emplace_back(); // update uses - for (auto operand : {a, b, c}) { + for (auto operand : operands) { auto target = getUses(operand); if (target == nullptr) continue; // avoid duplicates @@ -290,14 +285,15 @@ void Context::reschedule() { stack.push_back(operand.toInstIndex()); } else { stack.pop_back(); - Operand result = addInstructionNoCache( - oldOperations[back], toNewOperand(curOperands[0]), - toNewOperand(curOperands[1]), toNewOperand(curOperands[2])); + std::array newOperands; + for (int i : {0, 1, 2}) newOperands[i] = toNewOperand(curOperands[i]); + Operand result = addInstructionNoCache(oldOperations[back], newOperands); computedInst[back] = result; } } addInstructionNoCache(OpCode::RETURN, - computedInst[oldOperands.back()[0].toInstIndex()]); + {computedInst[oldOperands.back()[0].toInstIndex()], + Operand::none(), Operand::none()}); } struct RegEntry { @@ -318,6 +314,13 @@ void addImmediate(std::vector &tape, T imm) { for (auto byte : tmpBuffer) tape.push_back(byte); } +template +typename V::value_type pop_back(V &v) { + auto x = v.back(); + v.pop_back(); + return x; +} + std::pair, size_t> Context::genTape() { std::vector tape; size_t bufferSize = 3; @@ -333,25 +336,17 @@ std::pair, size_t> Context::genTape() { regCache.insert(std::lower_bound(regCache.cbegin(), regCache.cend(), entry), entry); }; + auto allocateReg = [&]() { - if (!availableReg.empty()) { - auto reg = availableReg.back(); - availableReg.pop_back(); - return reg; - } + if (!availableReg.empty()) return pop_back(availableReg); // used too many registers, need to spill something // note: tested with a limit of 7, spills correctly if (bufferSize > 255) { auto reg = regCache.front().reg; // we can just discard constants, so only spill instruction results if (regCache.front().operand.isResult()) { - uint32_t slot; - if (spillSlots.empty()) { - slot = bufferSize++; - } else { - slot = spillSlots.back(); - spillSlots.pop_back(); - } + uint32_t slot = + spillSlots.empty() ? bufferSize++ : pop_back(spillSlots); spills.insert({regCache.front().operand, slot}); tape.push_back(static_cast(OpCode::STORE)); addImmediate(tape, slot); @@ -360,8 +355,7 @@ std::pair, size_t> Context::genTape() { regCache.erase(regCache.begin()); return reg; } - auto reg = static_cast(bufferSize++); - return reg; + return static_cast(bufferSize++); }; auto handleOperands = [&](std::array instOperands, size_t inst) { auto getOperandReg = [&](Operand operand, size_t inst) { @@ -370,28 +364,22 @@ std::pair, size_t> Context::genTape() { // special xyz variables with fixed register if (!operand.isConst() && !operand.isResult()) return static_cast(-(operand.id + 1)); - // the operand, if present, must be at the end of the cache due to how the + // the operand, if present, must be at the end of the cache, due to how the // cache is ordered for (auto it = regCache.rbegin(); - it != regCache.rend() && it->nextUse == inst; ++it) { - if (it->operand == operand) { - return it->reg; - } - } + it != regCache.rend() && it->nextUse == inst; ++it) + if (it->operand == operand) return it->reg; + // if not found, either a spill or a constant auto reg = allocateReg(); - auto iter = spills.find(operand); + // we will never spill constants + auto iter = operand.isResult() ? spills.find(operand) : spills.end(); if (iter == spills.end()) { DEBUG_ASSERT(operand.isConst(), logicErr, "can only materialize constants"); - tape.push_back(static_cast(OpCode::CONST)); - tape.push_back(reg); - std::array tmpBuffer; - std::memcpy(tmpBuffer.data(), &constants[operand.toConstIndex()], - sizeof(double)); - for (auto byte : tmpBuffer) tape.push_back(byte); + tape.insert(tape.end(), {static_cast(OpCode::CONST), reg}); + addImmediate(tape, constants[operand.toConstIndex()]); } else { - tape.push_back(static_cast(OpCode::LOAD)); - tape.push_back(reg); + tape.insert(tape.end(), {static_cast(OpCode::LOAD), reg}); addImmediate(tape, iter->second); spillSlots.push_back(iter->second); spills.erase(iter); @@ -436,8 +424,7 @@ std::pair, size_t> Context::genTape() { if (operations[i] == OpCode::NOP) continue; auto tmp = handleOperands(operands[i], i); if (operations[i] == OpCode::RETURN) { - tape.push_back(static_cast(operations[i])); - tape.push_back(tmp[0]); + tape.insert(tape.end(), {static_cast(operations[i]), tmp[0]}); break; } // note that we may spill the operand register, but that is fine @@ -450,8 +437,7 @@ std::pair, size_t> Context::genTape() { } else { insertRegCache({uses->front(), instOp, reg}); } - tape.push_back(static_cast(operations[i])); - tape.push_back(reg); + tape.insert(tape.end(), {static_cast(operations[i]), reg}); for (size_t j : {0, 1, 2}) { if (operands[i][j].isNone()) break; tape.push_back(tmp[j]); diff --git a/src/sdf/context.h b/src/sdf/context.h index f97fafcda..b6067e5d6 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -56,13 +56,11 @@ struct std::hash { }; template <> -struct std::hash>> { +struct std::hash>> { size_t operator()( - const std::pair>& pair) - const { + const std::pair>& pair) const { size_t h = std::hash()(static_cast(pair.first)); - hash_combine(h, std::get<0>(pair.second), std::get<1>(pair.second), - std::get<2>(pair.second)); + hash_combine(h, pair.second[0], pair.second[1], pair.second[2]); return h; } }; @@ -73,9 +71,7 @@ class Context { using UsesVector = small_vector; Operand addConstant(double d); - Operand addInstruction(OpCode op, Operand a = Operand::none(), - Operand b = Operand::none(), - Operand c = Operand::none()); + Operand addInstruction(OpCode op, std::array operands); void optimizeFMA(); void reschedule(); @@ -102,13 +98,9 @@ class Context { std::vector tmpTape; std::vector tmpBuffer; - std::unordered_map>, - Operand> - cache; + std::unordered_map>, Operand> cache; - Operand addInstructionNoCache(OpCode op, Operand a = Operand::none(), - Operand b = Operand::none(), - Operand c = Operand::none()); + Operand addInstructionNoCache(OpCode op, std::array operands); small_vector* getUses(Operand operand) { if (operand.isResult()) { diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index 2dcaa1809..e08bdf0bd 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -225,12 +225,13 @@ std::pair, size_t> Value::genTape() const { stack.pop_back(); // check if inserted... can happen when evaluating with a DAG if (cache.find(current) != cache.end()) continue; - cache.insert({current, ctx.addInstruction(current->op, a, b, c)}); + cache.insert({current, ctx.addInstruction(current->op, {a, b, c})}); } } Operand result = getOperand(*this, [](Value _) {}); - ctx.addInstruction(OpCode::RETURN, result, Operand::none(), Operand::none()); + ctx.addInstruction(OpCode::RETURN, + {result, Operand::none(), Operand::none()}); ctx.optimizeFMA(); ctx.reschedule(); From 181bf170f3662d852afc9b1c9e2a045b1b21f729 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 00:45:10 +0800 Subject: [PATCH 21/37] idea about optimizing tape --- src/sdf/optimizing_tape.h | 99 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 src/sdf/optimizing_tape.h diff --git a/src/sdf/optimizing_tape.h b/src/sdf/optimizing_tape.h new file mode 100644 index 000000000..d7e0a4ca1 --- /dev/null +++ b/src/sdf/optimizing_tape.h @@ -0,0 +1,99 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include + +#include "interval.h" +#include "manifold/vec_view.h" + +namespace manifold::sdf { +class OptimizerContext { + public: + private: + struct TapeMetadata { + size_t tapeLength; + size_t instructionCount; + size_t farDependencyCount; + }; + struct FarDependency { + uint32_t instructionIndex; + // dependency indices for the three operands + // value std::numeric_limits::max() means there is no such an + // operand + std::array dependencyIndices; + bool operator<(const FarDependency &other) const { + return instructionIndex < other.instructionIndex; + } + }; + + /* -------------------------------------------------------------------------- + * Multiple tape storage, but each tape is represented with 4 arrays, + * and we join the individual arrays. + * Ends are marked with tape metadata. + * + * Each tape consists of the underlying opcode, use counts for each + * instruction result, dependencies for each instruction operand. While + * dependencies can be reconstructed while executing the code, that adds a + * considerable overhead *regardless* of whether we can optimize later. + * Instead, we move the overhead to initialization and in the optimizer (it + * tracks the use count), with the cost of using more memory. + * + * Note that we only track direct dependencies: Instructions can depend on + * load instructions instead of the actual instruction computing the value of + * the spill, and the load instruction depends on the corresponding store + * instruction. This allows us to remove register spills if possible. + * + * For dependencies, we use relative index to track them. + * ID = current ID - value + * - If value is 0, this means the operand is not being used, i.e. the + * instruction does not have 3 operands. + * - If value is 255, this means the dependency is too far away, and we + * should look it up in far dependencies. + * Ideally, we should not have too many far dependencies. + * + * Due to the variable length encoding used in the instruction tape, we + * cannot find the opcode in O(1) time given instruction ID, so things we do + * during optimization should not involve the opcode until we actually need + * to generate a new tape. + */ + std::vector tapes; + std::vector useCounts; + std::vector> dependencies; + std::vector farDependencies; + std::vector tapeMetadata; + + /* -------------------------------------------------------------------------- + * Per evaluation data structures. + * In principle, these can be constructed per evaluation, but to minimize + * memory operations, we reuse them. + * + * - `buffer` is the regular register buffer for tape evaluation. + * - `constantOffset` is a constant that adds to a corresponding register. + * This can be constant folded. + * - `results` contains instruction id + register id, indicating the + * predetermined branch result for choice/min/max function. + * Note that this must be sorted according to instruction id. + * - `uses` is the temporary use count vector that is mutable in each + * evaluation. It is reset before each evaluation. + * - `dead` contains instruction IDs that are dead, for later dead code + * elimination. + */ + VecView> buffer; + VecView constantOffset; + std::vector> results; + std::vector uses; + std::vector dead; +}; +} // namespace manifold::sdf From 3e71d27b8dc44582ae55d78b45a5bbf880f42e2f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 00:47:31 +0800 Subject: [PATCH 22/37] format --- src/sdf/context.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 80c612b54..f657b45fc 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -364,8 +364,8 @@ std::pair, size_t> Context::genTape() { // special xyz variables with fixed register if (!operand.isConst() && !operand.isResult()) return static_cast(-(operand.id + 1)); - // the operand, if present, must be at the end of the cache, due to how the - // cache is ordered + // the operand, if present, must be at the end of the cache, due to how + // the cache is ordered for (auto it = regCache.rbegin(); it != regCache.rend() && it->nextUse == inst; ++it) if (it->operand == operand) return it->reg; From 7eeb12a1dce8be3255597a876a0b0be652136a33 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 11:39:36 +0800 Subject: [PATCH 23/37] remove incorrect constexpr --- src/sdf/interval.h | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/sdf/interval.h b/src/sdf/interval.h index c9ec0d46c..bfac6f889 100644 --- a/src/sdf/interval.h +++ b/src/sdf/interval.h @@ -14,6 +14,7 @@ #pragma once #include +#include #include #include "manifold/common.h" @@ -33,15 +34,13 @@ struct Interval { Interval(Domain lower, Domain upper) : lower(lower), upper(upper) {} static Interval constant(Domain v) { return {v, v}; } - constexpr Interval operator+(const Interval &other) const { + Interval operator+(const Interval &other) const { return {lower + other.lower, upper + other.upper}; } - constexpr Interval operator-() const { return {-upper, -lower}; } + Interval operator-() const { return {-upper, -lower}; } - constexpr Interval operator-(const Interval &other) const { - return *this + (-other); - } + Interval operator-(const Interval &other) const { return *this + (-other); } Interval operator*(const Interval &other) const { Domain a1b1 = lower * other.lower; @@ -86,7 +85,7 @@ struct Interval { constexpr bool is_const() const { return lower == upper; } - constexpr Interval operator==(const Interval &other) const { + Interval operator==(const Interval &other) const { if (is_const() && other.is_const() && lower == other.lower) return constant(1); // must be equal if (lower > other.upper || upper < other.lower) @@ -96,49 +95,49 @@ struct Interval { constexpr bool operator==(double d) const { return is_const() && lower == d; } - constexpr Interval operator>(const Interval &other) const { + Interval operator>(const Interval &other) const { if (lower > other.upper) return constant(1); if (upper < other.lower) return constant(0); return {0, 1}; } - constexpr Interval operator<(const Interval &other) const { + Interval operator<(const Interval &other) const { if (upper < other.lower) return constant(1); if (lower > other.upper) return constant(0); return {0, 1}; } - constexpr Interval min(const Interval &other) const { + Interval min(const Interval &other) const { return {std::min(lower, other.lower), std::min(upper, other.upper)}; } - constexpr Interval max(const Interval &other) const { + Interval max(const Interval &other) const { return {std::max(lower, other.lower), std::max(upper, other.upper)}; } - constexpr Interval merge(const Interval &other) const { + Interval merge(const Interval &other) const { return {std::min(lower, other.lower), std::max(upper, other.upper)}; } template - constexpr Interval monotone_map(F f) const { + Interval monotone_map(F f) const { if (is_const()) return constant(f(lower)); return {f(lower), f(upper)}; } template - constexpr Interval antimonotone_map(F f) const { + Interval antimonotone_map(F f) const { if (is_const()) return constant(f(lower)); return {f(upper), f(lower)}; } - constexpr Interval abs() const { + Interval abs() const { if (lower >= 0) return *this; if (upper <= 0) return {-upper, -lower}; return {0.0, std::max(-lower, upper)}; } - constexpr Interval mod(double m) const { + Interval mod(double m) const { // FIXME: cannot deal with negative m right now... Domain diff = std::fmod(lower, m); if (diff < 0) diff += m; @@ -148,17 +147,17 @@ struct Interval { return {diff, upper - cycle_min}; } - constexpr Interval logical_and(const Interval &other) const { + Interval logical_and(const Interval &other) const { return {lower == 0.0 || other.lower == 0.0 ? 0.0 : 1.0, upper == 1.0 && other.upper == 1.0 ? 1.0 : 0.0}; } - constexpr Interval logical_or(const Interval &other) const { + Interval logical_or(const Interval &other) const { return {lower == 0.0 && other.lower == 0.0 ? 0.0 : 1.0, upper == 1.0 || other.upper == 1.0 ? 1.0 : 0.0}; } - constexpr Interval sin() const { + Interval sin() const { if (is_const()) return constant(std::sin(lower)); // largely similar to cos int64_t min_pis = static_cast(std::floor((lower - kHalfPi) / kPi)); @@ -176,7 +175,7 @@ struct Interval { return {new_min, new_max}; } - constexpr Interval cos() const { + Interval cos() const { if (is_const()) return constant(std::cos(lower)); int64_t min_pis = static_cast(std::floor(lower / kPi)); int64_t max_pis = static_cast(std::floor(upper / kPi)); @@ -193,7 +192,7 @@ struct Interval { return {new_min, new_max}; } - constexpr Interval tan() const { + Interval tan() const { if (is_const()) return constant(std::tan(lower)); int64_t min_pis = static_cast(std::floor((lower + kHalfPi) / kPi)); int64_t max_pis = static_cast(std::floor((upper + kHalfPi) / kPi)); From 82627c9b1f857f2ccdc249df0d657e0d4e21a536 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 16:38:10 +0800 Subject: [PATCH 24/37] speedup codegen --- src/sdf/context.cpp | 284 ++++++++++++++++---------------------------- src/sdf/context.h | 52 ++++---- src/sdf/value.cpp | 45 +++---- src/utils.h | 9 ++ 4 files changed, 162 insertions(+), 228 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index f657b45fc..f56be48c5 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -25,10 +25,10 @@ namespace manifold::sdf { void Context::dump() const { #ifdef MANIFOLD_DEBUG - for (size_t i = 0; i < operations.size(); i++) { + for (size_t i = 0; i < instructions.size(); i++) { std::cout << i << " "; - std::cout << " " << dumpOpCode(operations[i]) << " "; - for (Operand operand : operands[i]) { + std::cout << " " << dumpOpCode(instructions[i].op) << " "; + for (Operand operand : instructions[i].operands) { if (operand.isNone()) break; if (operand.isResult()) std::cout << "r" << operand.toInstIndex(); @@ -56,8 +56,8 @@ Operand Context::addConstant(double d) { return result.first->second; } -Operand Context::addInstruction(OpCode op, std::array operands) { - switch (op) { +Operand Context::addInstruction(Instruction inst) { + switch (inst.op) { case OpCode::ADD: case OpCode::MUL: case OpCode::MIN: @@ -68,25 +68,26 @@ Operand Context::addInstruction(OpCode op, std::array operands) { case OpCode::FMA: // first two operands commutative, sort them // makes it more likely to find common subexpressions - if (operands[0].id > operands[1].id) std::swap(operands[0], operands[1]); + if (inst.operands[0].id > inst.operands[1].id) + std::swap(inst.operands[0], inst.operands[1]); break; default: break; } // common subexpression elimination - auto key = std::make_pair(op, operands); - auto entry = cache.find(key); + auto entry = cache.find(inst); if (entry != cache.end()) return entry->second; - auto result = addInstructionNoCache(op, operands); - cache.insert({key, result}); + auto result = addInstructionNoCache(inst); + cache.insert({inst, result}); return result; } // bypass the cache because we don't expect to have more common subexpressions // after optimizations -Operand Context::addInstructionNoCache(OpCode op, - std::array operands) { +Operand Context::addInstructionNoCache(Instruction inst) { // constant choice + auto op = inst.op; + auto &operands = inst.operands; if (op == OpCode::CHOICE && operands[0].isConst()) { if (constants[operands[0].toConstIndex()] == 1.0) return operands[1]; return operands[2]; @@ -96,22 +97,72 @@ Operand Context::addInstructionNoCache(OpCode op, for (auto operand : operands) { if (!operand.isConst() && !operand.isNone()) all_constants = false; } - if (all_constants) { - tmpTape = {static_cast(op), 0}; - tmpBuffer = {0.0}; - for (Operand x : operands) { - if (!x.isConst()) break; - tmpTape.push_back(tmpBuffer.size()); - tmpBuffer.push_back(constants[x.toConstIndex()]); + // we should not do anything about returning a constant... + if (all_constants && op != OpCode::RETURN) { + double result = 0.0; + switch (op) { + case OpCode::NOP: + case OpCode::RETURN: + case OpCode::CONST: + case OpCode::STORE: + case OpCode::LOAD: + break; + case OpCode::ABS: + case OpCode::NEG: + case OpCode::EXP: + case OpCode::LOG: + case OpCode::SQRT: + case OpCode::FLOOR: + case OpCode::CEIL: + case OpCode::ROUND: + case OpCode::SIN: + case OpCode::COS: + case OpCode::TAN: + case OpCode::ASIN: + case OpCode::ACOS: + case OpCode::ATAN: + result = EvalContext::handle_unary( + op, constants[operands[0].toConstIndex()]); + break; + case OpCode::DIV: + case OpCode::MOD: + case OpCode::MIN: + case OpCode::MAX: + case OpCode::EQ: + case OpCode::GT: + case OpCode::AND: + case OpCode::OR: + result = EvalContext::handle_binary( + op, constants[operands[0].toConstIndex()], + constants[operands[1].toConstIndex()]); + break; + case OpCode::ADD: + result = constants[operands[0].toConstIndex()] + + constants[operands[1].toConstIndex()]; + break; + case OpCode::SUB: + result = constants[operands[0].toConstIndex()] - + constants[operands[1].toConstIndex()]; + break; + case OpCode::MUL: + result = constants[operands[0].toConstIndex()] * + constants[operands[1].toConstIndex()]; + break; + case OpCode::FMA: + result = constants[operands[0].toConstIndex()] * + constants[operands[1].toConstIndex()] + + constants[operands[2].toConstIndex()]; + break; + case OpCode::CHOICE: + // should be unreachable + DEBUG_ASSERT(false, logicErr, "unreachable"); + break; } - tmpTape.insert(tmpTape.end(), {static_cast(OpCode::RETURN), 0}); - auto bufferView = VecView(tmpBuffer.data(), tmpBuffer.size()); - return addConstant(EvalContext{tmpTape, bufferView}.eval()); + return addConstant(result); } - size_t i = operations.size(); - operations.push_back(op); - this->operands.push_back(operands); + size_t i = instructions.size(); + instructions.push_back({op, operands}); opUses.emplace_back(); // update uses for (auto operand : operands) { @@ -128,19 +179,18 @@ Context::UsesVector::const_iterator findUse(const Context::UsesVector &uses, return std::lower_bound(uses.cbegin(), uses.cend(), inst); } -void Context::optimizeFMA() { +void Context::peephole() { auto tryApply = [&](size_t i, Operand lhs, Operand rhs) { if (!lhs.isResult()) return false; auto lhsInst = lhs.toInstIndex(); - if (operations[lhsInst] != OpCode::MUL || opUses[lhsInst].size() != 1) + if (instructions[lhsInst].op != OpCode::MUL || opUses[lhsInst].size() != 1) return false; - operations[i] = OpCode::FMA; - Operand a = operands[lhsInst][0]; - Operand b = operands[lhsInst][1]; - operands[i] = {a, b, rhs}; + Operand a = instructions[lhsInst].operands[0]; + Operand b = instructions[lhsInst].operands[1]; + instructions[i] = {OpCode::FMA, {a, b, rhs}}; // remove instruction - operations[lhsInst] = OpCode::NOP; - operands[lhsInst] = {Operand::none(), Operand::none(), Operand::none()}; + auto none = Operand::none(); + instructions[lhsInst] = {OpCode::NOP, {none, none, none}}; // update uses, note that we need to maintain the order of the indices opUses[lhsInst].clear(); auto updateUses = [&](Operand x) { @@ -157,145 +207,17 @@ void Context::optimizeFMA() { if (a != b) updateUses(b); return true; }; - for (size_t i = 0; i < operations.size(); i++) { - if (operations[i] == OpCode::ADD) { + for (size_t i = 0; i < instructions.size(); i++) { + auto &inst = instructions[i]; + if (inst.op == OpCode::ADD) { // check if lhs/rhs comes from MUL with no other uses - auto lhs = operands[i][0]; - auto rhs = operands[i][1]; + auto lhs = inst.operands[0]; + auto rhs = inst.operands[1]; if (!tryApply(i, lhs, rhs)) tryApply(i, rhs, lhs); } } } -// this does dead code elimination as well -// assumes the last instruction is return -// and note that this is not optimal, and cannot be optimal without dealing with -// NP-hard stuff... -void Context::reschedule() { - DEBUG_ASSERT(!operations.empty() && operations.back() == OpCode::RETURN, - logicErr, "return expected"); - cache.clear(); - auto oldOperations = std::move(operations); - auto oldOperands = std::move(operands); - opUses.clear(); - for (auto &uses : constantUses) uses.clear(); - - std::vector computedInst(oldOperands.size(), Operand::none()); - std::vector stack; - stack.reserve(64); - if (oldOperands.back()[0].isResult()) - stack.push_back(oldOperands.back()[0].toInstIndex()); - - std::vector bitset(oldOperands.size(), 0); - std::vector distances(oldOperands.size(), 0); - std::vector tmpStack; - tmpStack.reserve(64); - - auto requiresComputation = [&computedInst](Operand operand) { - return operand.isResult() && computedInst[operand.toInstIndex()].isNone(); - }; - auto toNewOperand = [&computedInst](Operand old) { - if (old.isResult()) return computedInst[old.toInstIndex()]; - return old; - }; - - while (!stack.empty()) { - int numResults = 0; - auto back = stack.back(); - if (!computedInst[back].isNone()) { - stack.pop_back(); - continue; - } - auto &curOperands = oldOperands[back]; - for (auto operand : curOperands) - if (requiresComputation(operand)) numResults += 1; - if (numResults > 1) { - // find common results first - // does this by recursively marking instructions to be the transitive - // dependency of operands - // we use a bitset, so if the bitset & (1 << (numResults + 1)) - 1, - // it means that the instruction is the common dependency for all operands - uint8_t mask = (1 << (numResults + 1)) - 1; - numResults = 0; - for (auto operand : curOperands) { - if (!requiresComputation(operand)) continue; - tmpStack.push_back(operand.toInstIndex()); - while (!tmpStack.empty()) { - auto current = tmpStack.back(); - tmpStack.pop_back(); - // already computed - if (!computedInst[current].isNone()) continue; - bitset[current] |= 1 << numResults; - for (auto x : oldOperands[current]) { - if (!x.isResult()) continue; - tmpStack.push_back(x.toInstIndex()); - } - } - numResults += 1; - } - // compute operand costs as distance in the dependency graph - std::array costs = {0, 0, 0}; - std::array ids = {0, 1, 2}; - for (size_t i = 0; i < curOperands.size(); i++) { - auto operand = curOperands[i]; - if (!requiresComputation(operand)) continue; - tmpStack.push_back(operand.toInstIndex()); - while (!tmpStack.empty()) { - auto current = tmpStack.back(); - size_t maxDistance = 0; - for (auto x : oldOperands[current]) { - if (!x.isResult()) continue; - auto inst = x.toInstIndex(); - - // computed, doesn't affect distance - if (!computedInst[inst].isNone()) continue; - - // shared dependency between operands, also doesn't affect distance - if ((bitset[inst] & mask) == mask) continue; - - auto d = distances[inst]; - if (d == 0) { - // not computed - tmpStack.push_back(x.toInstIndex()); - maxDistance = std::numeric_limits::max(); - } else { - maxDistance = std::max(maxDistance, d); - } - } - if (maxDistance != std::numeric_limits::max()) { - tmpStack.pop_back(); - distances[current] = maxDistance + 1; - } - } - costs[i] = distances[operand.toInstIndex()]; - std::fill(distances.begin(), distances.end(), 0); - } - std::sort(ids.begin(), ids.end(), - [&costs](size_t x, size_t y) { return costs[x] < costs[y]; }); - // expensive operands are placed at the top of the stack, i.e. scheduled - // earlier - for (size_t x : ids) - if (requiresComputation(curOperands[x])) - stack.push_back(curOperands[x].toInstIndex()); - - std::fill(bitset.begin(), bitset.end(), 0); - } else if (numResults == 1) { - for (auto operand : curOperands) - if (requiresComputation(operand)) - stack.push_back(operand.toInstIndex()); - } else { - stack.pop_back(); - std::array newOperands; - for (int i : {0, 1, 2}) newOperands[i] = toNewOperand(curOperands[i]); - Operand result = addInstructionNoCache(oldOperations[back], newOperands); - computedInst[back] = result; - } - } - addInstructionNoCache(OpCode::RETURN, - {computedInst[oldOperands.back()[0].toInstIndex()], - Operand::none(), Operand::none()}); -} - struct RegEntry { size_t nextUse; Operand operand; @@ -324,7 +246,7 @@ typename V::value_type pop_back(V &v) { std::pair, size_t> Context::genTape() { std::vector tape; size_t bufferSize = 3; - std::unordered_map spills; + unordered_map spills; std::vector spillSlots; std::vector availableReg; @@ -420,26 +342,24 @@ std::pair, size_t> Context::genTape() { return regs; }; - for (size_t i = 0; i < operations.size(); i++) { - if (operations[i] == OpCode::NOP) continue; - auto tmp = handleOperands(operands[i], i); - if (operations[i] == OpCode::RETURN) { - tape.insert(tape.end(), {static_cast(operations[i]), tmp[0]}); + for (size_t i = 0; i < instructions.size(); i++) { + auto &inst = instructions[i]; + if (inst.op == OpCode::NOP) continue; + auto instOp = Operand{static_cast(i) + 1}; + auto uses = getUses(instOp); + // avoid useless ops + if (inst.op != OpCode::RETURN && uses->empty()) continue; + auto tmp = handleOperands(inst.operands, i); + if (inst.op == OpCode::RETURN) { + tape.insert(tape.end(), {static_cast(inst.op), tmp[0]}); break; } // note that we may spill the operand register, but that is fine uint8_t reg = allocateReg(); - auto instOp = Operand{static_cast(i) + 1}; - auto uses = getUses(instOp); - if (uses->empty()) { - // immediately available - availableReg.push_back(reg); - } else { - insertRegCache({uses->front(), instOp, reg}); - } - tape.insert(tape.end(), {static_cast(operations[i]), reg}); + insertRegCache({uses->front(), instOp, reg}); + tape.insert(tape.end(), {static_cast(inst.op), reg}); for (size_t j : {0, 1, 2}) { - if (operands[i][j].isNone()) break; + if (inst.operands[j].isNone()) break; tape.push_back(tmp[j]); } } diff --git a/src/sdf/context.h b/src/sdf/context.h index b6067e5d6..5c860d3a9 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -13,15 +13,18 @@ // limitations under the License. #pragma once -#include #include #include +#include "../utils.h" #include "small_vector.h" #include "tape.h" namespace manifold::sdf { +// operands, 0 is invalid (uses fewer operands) +// +ve are instruction results +// -ve are constants struct Operand { int id; @@ -35,12 +38,23 @@ struct Operand { bool operator!=(const Operand& other) const { return id != other.id; } bool operator<(const Operand& other) const { return id < other.id; } }; + +struct Instruction { + OpCode op; + std::array operands; + bool operator==(const Instruction& other) const { + if (op != other.op) return false; + return operands[0] == other.operands[0] && + operands[1] == other.operands[1] && operands[2] == other.operands[2]; + } +}; } // namespace manifold::sdf using namespace manifold::sdf; inline void hash_combine(std::size_t& seed) {} +// note: ankerl hash combine function is too costly template inline void hash_combine(std::size_t& seed, const T& v, Rest... rest) { std::hash hasher; @@ -50,17 +64,14 @@ inline void hash_combine(std::size_t& seed, const T& v, Rest... rest) { template <> struct std::hash { - size_t operator()(const Operand& operand) const { - return std::hash()(operand.id); - } + size_t operator()(const Operand& operand) const { return operand.id; } }; template <> -struct std::hash>> { - size_t operator()( - const std::pair>& pair) const { - size_t h = std::hash()(static_cast(pair.first)); - hash_combine(h, pair.second[0], pair.second[1], pair.second[2]); +struct std::hash { + size_t operator()(const Instruction& inst) const { + size_t h = static_cast(inst.op); + hash_combine(h, inst.operands[0], inst.operands[1], inst.operands[2]); return h; } }; @@ -71,8 +82,8 @@ class Context { using UsesVector = small_vector; Operand addConstant(double d); - Operand addInstruction(OpCode op, std::array operands); - void optimizeFMA(); + Operand addInstruction(Instruction); + void peephole(); void reschedule(); std::pair, size_t> genTape(); @@ -82,33 +93,26 @@ class Context { private: // constants have negative IDs, starting from -4 // -1, -2 and -3 are reserved for x y z - std::unordered_map constantsIds; + unordered_map constantsIds; std::vector constants; // constant use vector, elements are instruction indices // constant with ID -4 is mapped to 0, etc. std::vector constantUses; // instructions, index 0 is mapped to ID 1, etc. - std::vector operations; + std::vector instructions; // instruction value use vector, elements are instruction indices std::vector opUses; - // operands, 0 is invalid (uses fewer operands) - // +ve are instruction results - // -ve are constants - std::vector> operands; - - std::vector tmpTape; - std::vector tmpBuffer; - std::unordered_map>, Operand> cache; + unordered_map cache; - Operand addInstructionNoCache(OpCode op, std::array operands); + Operand addInstructionNoCache(Instruction); - small_vector* getUses(Operand operand) { + UsesVector* getUses(Operand operand) { if (operand.isResult()) { return &opUses[operand.toInstIndex()]; } else if (operand.isConst()) { return &constantUses[operand.toConstIndex()]; } else { - return static_cast*>(nullptr); + return static_cast(nullptr); } }; }; diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index e08bdf0bd..d49679847 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -14,8 +14,7 @@ #include "value.h" -#include - +#include "../utils.h" #include "context.h" #include "tape.h" @@ -187,17 +186,26 @@ Value Value::atan() const { std::pair, size_t> Value::genTape() const { using VO = std::shared_ptr; Context ctx; - std::unordered_map cache; + unordered_map cache; std::vector stack; + cache.reserve(128); + stack.reserve(128); + if (kind == ValueKind::OPERATION) stack.push_back(std::get(v).get()); - auto getOperand = [&](Value x, std::function f) { + auto none = Operand::none(); + + bool ready = true; + auto getOperand = [&](const Value& x, bool pushStack) { switch (x.kind) { case ValueKind::OPERATION: { auto iter = cache.find(std::get(x.v).get()); if (iter != cache.end()) return iter->second; - f(x); - return Operand::none(); + if (pushStack) { + ready = false; + stack.push_back(std::get(x.v).get()); + } + return none; } case ValueKind::CONSTANT: return ctx.addConstant(std::get(x.v)); @@ -208,33 +216,26 @@ std::pair, size_t> Value::genTape() const { case ValueKind::Z: return Operand{-3}; default: - return Operand::none(); + return none; } }; while (!stack.empty()) { - bool ready = true; + ready = true; auto current = stack.back(); - auto f = [&](Value x) { - stack.push_back(std::get(x.v).get()); - ready = false; - }; - Operand a = getOperand(current->operands[0], f); - Operand b = getOperand(current->operands[1], f); - Operand c = getOperand(current->operands[2], f); + Operand a = getOperand(current->operands[0], true); + Operand b = getOperand(current->operands[1], true); + Operand c = getOperand(current->operands[2], true); if (ready) { stack.pop_back(); // check if inserted... can happen when evaluating with a DAG if (cache.find(current) != cache.end()) continue; - cache.insert({current, ctx.addInstruction(current->op, {a, b, c})}); + cache.insert({current, ctx.addInstruction({current->op, {a, b, c}})}); } } - Operand result = getOperand(*this, [](Value _) {}); - ctx.addInstruction(OpCode::RETURN, - {result, Operand::none(), Operand::none()}); - - ctx.optimizeFMA(); - ctx.reschedule(); + Operand result = getOperand(*this, false); + ctx.addInstruction({OpCode::RETURN, {result, none, none}}); + ctx.peephole(); return ctx.genTape(); } diff --git a/src/utils.h b/src/utils.h index 12d6a5847..453a40c5e 100644 --- a/src/utils.h +++ b/src/utils.h @@ -35,6 +35,15 @@ #include "./parallel.h" +#if __has_include() +#include +template +using unordered_map = phmap::flat_hash_map; +#else +template +using unordered_map = std::unordered_map; +#endif + #if __has_include() #include #else From 36321cd40dfc5854e32a52b09030bb7efabcff6c Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 16:50:37 +0800 Subject: [PATCH 25/37] avoid recursive value drop --- src/sdf/value.cpp | 17 +++++++++++++++++ src/sdf/value.h | 1 + 2 files changed, 18 insertions(+) diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index d49679847..cc9a7197e 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -183,6 +183,23 @@ Value Value::atan() const { Invalid())); } +Value::~Value() { + using VO = std::shared_ptr; + std::vector stack; + auto push = [&stack](VO&& vo) { + if (vo.use_count() == 1) stack.emplace_back(vo); + }; + if (kind == ValueKind::OPERATION) push(std::get(std::move(v))); + while (!stack.empty()) { + auto back = std::move(stack.back()); + stack.pop_back(); + for (auto& value : back->operands) { + if (value.kind == ValueKind::OPERATION) + push(std::get(std::move(value.v))); + } + } +} + std::pair, size_t> Value::genTape() const { using VO = std::shared_ptr; Context ctx; diff --git a/src/sdf/value.h b/src/sdf/value.h index b7fa73d6d..181d6c0c2 100644 --- a/src/sdf/value.h +++ b/src/sdf/value.h @@ -63,6 +63,7 @@ class Value { // internal use only std::pair, size_t> genTape() const; + ~Value(); private: ValueKind kind = ValueKind::INVALID; From 7cbc4de07420d5d899611106215d2e0017819f0c Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 27 Dec 2024 23:17:40 +0800 Subject: [PATCH 26/37] do some optimizations --- src/sdf/context.cpp | 65 ++++++++++++++++++++++++++++--- src/sdf/context.h | 3 ++ src/sdf/interval.h | 95 +++++++++++++++++++++++++-------------------- 3 files changed, 116 insertions(+), 47 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index f56be48c5..1711318eb 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -82,9 +82,7 @@ Operand Context::addInstruction(Instruction inst) { return result; } -// bypass the cache because we don't expect to have more common subexpressions -// after optimizations -Operand Context::addInstructionNoCache(Instruction inst) { +std::optional Context::trySimplify(Instruction inst) { // constant choice auto op = inst.op; auto &operands = inst.operands; @@ -161,11 +159,68 @@ Operand Context::addInstructionNoCache(Instruction inst) { return addConstant(result); } + // simple simplifications + if (op == OpCode::ADD) { + // add is commutative, so if there is a constant, it must be on the left + // 0 + x => x + if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 0.0) + return operands[1]; + } + if (op == OpCode::SUB) { + // x - 0 => x + if (operands[1].isConst() && constants[operands[1].toConstIndex()] == 0.0) + return operands[0]; + } + if (op == OpCode::MUL) { + // mul is commutative, so if there is a constant, it must be on the left + // 0 * x => 0 + if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 0.0) + return operands[0]; + // 1 * x => x + if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 1.0) + return operands[1]; + } + if (op == OpCode::DIV) { + if (operands[1].isConst() && constants[operands[1].toConstIndex()] == 1.0) + return operands[0]; + } + + return {}; +} + +Instruction Context::strengthReduction(Instruction inst) { + // strength reduction: reduce instructions to simpler variants + // not very helpful for point evaluation in a vm because instruction decoding + // is the most time consuming part. + // This can be useful if we want to do JIT, interval evaluation or bulk + // evaluation. + if (inst.op == OpCode::MUL && inst.operands[1].isConst() && + constants[inst.operands[1].toConstIndex()] == 2.0) { + // x * 2 => x + x + return {OpCode::ADD, {inst.operands[0], inst.operands[1], Operand::none()}}; + } + if (inst.op == OpCode::DIV && inst.operands[1].isConst()) { + // x / c => x * (1/c) + return {OpCode::MUL, + {inst.operands[0], + addConstant(1.0 / constants[inst.operands[1].toConstIndex()]), + Operand::none()}}; + } + return inst; +} + +// bypass the cache because we don't expect to have more common subexpressions +// after optimizations +Operand Context::addInstructionNoCache(Instruction inst) { + auto simplified = trySimplify(inst); + if (simplified.has_value()) return simplified.value(); + inst = strengthReduction(inst); + size_t i = instructions.size(); - instructions.push_back({op, operands}); + instructions.push_back(inst); opUses.emplace_back(); // update uses - for (auto operand : operands) { + for (auto operand : inst.operands) { auto target = getUses(operand); if (target == nullptr) continue; // avoid duplicates diff --git a/src/sdf/context.h b/src/sdf/context.h index 5c860d3a9..34d6187d9 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include #include #include @@ -104,6 +105,8 @@ class Context { std::vector opUses; unordered_map cache; + std::optional trySimplify(Instruction); + Instruction strengthReduction(Instruction); Operand addInstructionNoCache(Instruction); UsesVector* getUses(Operand operand) { diff --git a/src/sdf/interval.h b/src/sdf/interval.h index bfac6f889..aa991589e 100644 --- a/src/sdf/interval.h +++ b/src/sdf/interval.h @@ -27,6 +27,9 @@ struct Interval { Domain lower; Domain upper; + static constexpr Domain zero = static_cast(0); + static constexpr Domain one = static_cast(1); + Interval() : lower(-std::numeric_limits::infinity()), upper(std::numeric_limits::infinity()) {} @@ -42,14 +45,22 @@ struct Interval { Interval operator-(const Interval &other) const { return *this + (-other); } + Interval operator*(Domain d) const { + if (d > zero) return {lower * d, upper * d}; + return {upper * d, lower * d}; + } + Interval operator*(const Interval &other) const { + if (is_const()) return other * lower; + if (other.is_const()) return *this * other.lower; + Domain a1b1 = lower * other.lower; Domain a2b2 = upper * other.upper; // we can write more "fast paths", but at some point it will become slower // than just going the general path... - if (lower >= 0.0 && other.lower >= 0.0) + if (lower >= zero && other.lower >= zero) return {a1b1, a2b2}; - else if (upper <= 0.0 && other.upper <= 0.0) + else if (upper <= zero && other.upper <= zero) return {a2b2, a1b1}; Domain a1b2 = lower * other.upper; @@ -58,19 +69,13 @@ struct Interval { std::max(std::max(a1b1, a1b2), std::max(a2b1, a2b2))}; } - Interval operator*(double d) const { - if (d > 0) return {lower * d, upper * d}; - return {upper * d, lower * d}; - } - Interval operator/(const Interval &other) const { if (other.is_const()) return *this / other.lower; - constexpr Domain zero = static_cast(0); constexpr Domain infty = std::numeric_limits::infinity(); Interval reci; if (other.lower >= zero || other.upper <= zero) { - reci.lower = other.upper == zero ? -infty : (1 / other.upper); - reci.upper = other.lower == zero ? infty : (1 / other.lower); + reci.lower = other.upper == zero ? -infty : (one / other.upper); + reci.upper = other.lower == zero ? infty : (one / other.lower); } else { reci.lower = -infty; reci.upper = infty; @@ -78,8 +83,8 @@ struct Interval { return *this * reci; } - Interval operator/(double d) const { - if (d > 0) return {lower / d, upper / d}; + Interval operator/(Domain d) const { + if (d > zero) return {lower / d, upper / d}; return {upper / d, lower / d}; } @@ -87,24 +92,24 @@ struct Interval { Interval operator==(const Interval &other) const { if (is_const() && other.is_const() && lower == other.lower) - return constant(1); // must be equal + return constant(one); // must be equal if (lower > other.upper || upper < other.lower) - return constant(0); // disjoint, cannot possibly be equal - return {0, 1}; + return constant(zero); // disjoint, cannot possibly be equal + return {zero, one}; } - constexpr bool operator==(double d) const { return is_const() && lower == d; } + constexpr bool operator==(Domain d) const { return is_const() && lower == d; } Interval operator>(const Interval &other) const { - if (lower > other.upper) return constant(1); - if (upper < other.lower) return constant(0); - return {0, 1}; + if (lower > other.upper) return constant(one); + if (upper < other.lower) return constant(zero); + return {zero, one}; } Interval operator<(const Interval &other) const { - if (upper < other.lower) return constant(1); - if (lower > other.upper) return constant(0); - return {0, 1}; + if (upper < other.lower) return constant(one); + if (lower > other.upper) return constant(zero); + return {zero, one}; } Interval min(const Interval &other) const { @@ -132,36 +137,38 @@ struct Interval { } Interval abs() const { - if (lower >= 0) return *this; - if (upper <= 0) return {-upper, -lower}; - return {0.0, std::max(-lower, upper)}; + if (lower >= zero) return *this; + if (upper <= zero) return {-upper, -lower}; + return {zero, std::max(-lower, upper)}; } - Interval mod(double m) const { + Interval mod(Domain m) const { // FIXME: cannot deal with negative m right now... Domain diff = std::fmod(lower, m); - if (diff < 0) diff += m; + if (diff < zero) diff += m; Domain cycle_min = lower - diff; // may be disjoint intervals, but we don't deal with that... - if (upper - cycle_min >= m) return {0.0, m}; + if (upper - cycle_min >= m) return {zero, m}; return {diff, upper - cycle_min}; } Interval logical_and(const Interval &other) const { - return {lower == 0.0 || other.lower == 0.0 ? 0.0 : 1.0, - upper == 1.0 && other.upper == 1.0 ? 1.0 : 0.0}; + return {lower == zero || other.lower == zero ? zero : one, + upper == one && other.upper == one ? one : zero}; } Interval logical_or(const Interval &other) const { - return {lower == 0.0 && other.lower == 0.0 ? 0.0 : 1.0, - upper == 1.0 || other.upper == 1.0 ? 1.0 : 0.0}; + return {lower == zero && other.lower == zero ? zero : one, + upper == one || other.upper == one ? one : zero}; } Interval sin() const { if (is_const()) return constant(std::sin(lower)); // largely similar to cos - int64_t min_pis = static_cast(std::floor((lower - kHalfPi) / kPi)); - int64_t max_pis = static_cast(std::floor((upper - kHalfPi) / kPi)); + int64_t min_pis = static_cast(std::floor( + (lower - static_cast(kHalfPi)) / static_cast(kPi))); + int64_t max_pis = static_cast(std::floor( + (upper - static_cast(kHalfPi)) / static_cast(kPi))); bool not_cross_pos_1 = (min_pis % 2 == 0) ? max_pis - min_pis <= 1 : max_pis == min_pis; @@ -169,16 +176,18 @@ struct Interval { (min_pis % 2 == 0) ? max_pis == min_pis : max_pis - min_pis <= 1; Domain new_min = - not_cross_neg_1 ? std::min(std::sin(lower), std::sin(upper)) : -1.0; + not_cross_neg_1 ? std::min(std::sin(lower), std::sin(upper)) : -one; Domain new_max = - not_cross_pos_1 ? std::max(std::sin(lower), std::sin(upper)) : 1.0; + not_cross_pos_1 ? std::max(std::sin(lower), std::sin(upper)) : one; return {new_min, new_max}; } Interval cos() const { if (is_const()) return constant(std::cos(lower)); - int64_t min_pis = static_cast(std::floor(lower / kPi)); - int64_t max_pis = static_cast(std::floor(upper / kPi)); + int64_t min_pis = + static_cast(std::floor(lower / static_cast(kPi))); + int64_t max_pis = + static_cast(std::floor(upper / static_cast(kPi))); bool not_cross_pos_1 = (min_pis % 2 == 0) ? max_pis - min_pis <= 1 : max_pis == min_pis; @@ -186,16 +195,18 @@ struct Interval { (min_pis % 2 == 0) ? max_pis == min_pis : max_pis - min_pis <= 1; Domain new_min = - not_cross_neg_1 ? std::min(std::cos(lower), std::cos(upper)) : -1.0; + not_cross_neg_1 ? std::min(std::cos(lower), std::cos(upper)) : -one; Domain new_max = - not_cross_pos_1 ? std::max(std::cos(lower), std::cos(upper)) : 1.0; + not_cross_pos_1 ? std::max(std::cos(lower), std::cos(upper)) : one; return {new_min, new_max}; } Interval tan() const { if (is_const()) return constant(std::tan(lower)); - int64_t min_pis = static_cast(std::floor((lower + kHalfPi) / kPi)); - int64_t max_pis = static_cast(std::floor((upper + kHalfPi) / kPi)); + int64_t min_pis = static_cast(std::floor( + (lower + static_cast(kHalfPi)) / static_cast(kPi))); + int64_t max_pis = static_cast(std::floor( + (upper + static_cast(kHalfPi)) / static_cast(kPi))); if (min_pis != max_pis) return {-std::numeric_limits::infinity(), std::numeric_limits::infinity()}; From 2da5ffb432be0256e0873c6d60b2f3a0685530ba Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:06:51 +0800 Subject: [PATCH 27/37] fix small_vector, really dumb --- src/sdf/small_vector.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h index 88090f8cc..73e81742b 100644 --- a/src/sdf/small_vector.h +++ b/src/sdf/small_vector.h @@ -216,6 +216,7 @@ class small_vector { // currently, all data on heap // move back to stack std::move(heap_.begin(), heap_.end(), stack_.begin()); + heap_.clear(); } else { // all data already on stack // just update size @@ -233,9 +234,7 @@ class small_vector { } void clear() { - if (size_ > N) { - heap_.clear(); - } + if (size_ > N) heap_.clear(); size_ = 0; } @@ -360,12 +359,18 @@ class small_vector { const_iterator end() const { return cend(); } void erase(iterator iter) { + if (size_ == 0) return; size_type i = std::distance(begin(), iter); if (size_ <= N) { - std::move_backward(stack_.begin() + i + 1, stack_.begin() + size_, - stack_.begin() + i); + if (i < size_ - 1) + std::move(stack_.begin() + i + 1, stack_.begin() + size_, + stack_.begin() + i); } else { heap_.erase(heap_.begin() + i); + if (size_ == N + 1) { + std::copy(heap_.begin(), heap_.end(), stack_.begin()); + heap_.clear(); + } } size_ -= 1; } @@ -377,13 +382,14 @@ class small_vector { void insert(iterator iter, const T &value) { size_type i = std::distance(begin(), iter); if (size_ < N) { - if (i + 1 < size_) + if (i < size_) std::move_backward(stack_.begin() + i, stack_.begin() + size_, - stack_.begin() + i + 1); + stack_.begin() + size_ + 1); stack_[i] = value; } else { - if (size_ == N) - std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); + if (size_ == N) { + std::copy(stack_.begin(), stack_.end(), std::back_inserter(heap_)); + } heap_.insert(heap_.begin() + i, value); } size_ += 1; From d2614229b28661b144ff18e470eeb80fbbd1ab24 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:34:00 +0800 Subject: [PATCH 28/37] more optimization --- src/sdf/context.cpp | 373 ++++++++++++++++++++++++++++++++++++----- src/sdf/context.h | 10 +- src/sdf/tape.h | 24 +-- src/sdf/value.cpp | 43 +++-- test/sdf_tape_test.cpp | 25 ++- 5 files changed, 398 insertions(+), 77 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 1711318eb..6182c9934 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -22,6 +22,29 @@ #include "manifold/optional_assert.h" +struct AffineValue { + // value = var * a + b + int var; + double a; + double b; + + AffineValue(int var, double a, double b) : var(var), a(a), b(b) {} + AffineValue(double constant) + : var(std::numeric_limits::max()), a(0.0), b(constant) {} + bool operator==(const AffineValue &other) const { + return var == other.var && a == other.a && b == other.b; + } +}; + +template <> +struct std::hash { + size_t operator()(const AffineValue &value) const { + size_t h = std::hash()(value.var); + hash_combine(h, value.a, value.b); + return h; + } +}; + namespace manifold::sdf { void Context::dump() const { #ifdef MANIFOLD_DEBUG @@ -33,7 +56,8 @@ void Context::dump() const { if (operand.isResult()) std::cout << "r" << operand.toInstIndex(); else if (operand.isConst()) - std::cout << constants[operand.toConstIndex()]; + std::cout << constants[operand.toConstIndex()] << "(" << operand.id + << ")"; else std::cout << static_cast('X' - operand.id - 1); std::cout << " "; @@ -63,8 +87,6 @@ Operand Context::addInstruction(Instruction inst) { case OpCode::MIN: case OpCode::MAX: case OpCode::EQ: - case OpCode::AND: - case OpCode::OR: case OpCode::FMA: // first two operands commutative, sort them // makes it more likely to find common subexpressions @@ -101,7 +123,7 @@ std::optional Context::trySimplify(Instruction inst) { switch (op) { case OpCode::NOP: case OpCode::RETURN: - case OpCode::CONST: + case OpCode::CONSTANT: case OpCode::STORE: case OpCode::LOAD: break; @@ -128,8 +150,6 @@ std::optional Context::trySimplify(Instruction inst) { case OpCode::MAX: case OpCode::EQ: case OpCode::GT: - case OpCode::AND: - case OpCode::OR: result = EvalContext::handle_binary( op, constants[operands[0].toConstIndex()], constants[operands[1].toConstIndex()]); @@ -188,33 +208,11 @@ std::optional Context::trySimplify(Instruction inst) { return {}; } -Instruction Context::strengthReduction(Instruction inst) { - // strength reduction: reduce instructions to simpler variants - // not very helpful for point evaluation in a vm because instruction decoding - // is the most time consuming part. - // This can be useful if we want to do JIT, interval evaluation or bulk - // evaluation. - if (inst.op == OpCode::MUL && inst.operands[1].isConst() && - constants[inst.operands[1].toConstIndex()] == 2.0) { - // x * 2 => x + x - return {OpCode::ADD, {inst.operands[0], inst.operands[1], Operand::none()}}; - } - if (inst.op == OpCode::DIV && inst.operands[1].isConst()) { - // x / c => x * (1/c) - return {OpCode::MUL, - {inst.operands[0], - addConstant(1.0 / constants[inst.operands[1].toConstIndex()]), - Operand::none()}}; - } - return inst; -} - // bypass the cache because we don't expect to have more common subexpressions // after optimizations Operand Context::addInstructionNoCache(Instruction inst) { auto simplified = trySimplify(inst); if (simplified.has_value()) return simplified.value(); - inst = strengthReduction(inst); size_t i = instructions.size(); instructions.push_back(inst); @@ -234,7 +232,22 @@ Context::UsesVector::const_iterator findUse(const Context::UsesVector &uses, return std::lower_bound(uses.cbegin(), uses.cend(), inst); } -void Context::peephole() { +void Context::addUse(Operand operand, size_t inst) { + if (!operand.isResult() && !operand.isConst()) return; + auto uses = getUses(operand); + auto iter = findUse(*uses, inst); + if (iter == uses->cend() || *iter != inst) uses->insert(iter, inst); +} + +void Context::removeUse(Operand operand, size_t inst) { + if (!operand.isResult() && !operand.isConst()) return; + auto uses = getUses(operand); + auto iter = findUse(*uses, inst); + if (*iter == inst) uses->erase(iter); +} + +void Context::combineFMA() { + const auto none = Operand::none(); auto tryApply = [&](size_t i, Operand lhs, Operand rhs) { if (!lhs.isResult()) return false; auto lhsInst = lhs.toInstIndex(); @@ -244,19 +257,12 @@ void Context::peephole() { Operand b = instructions[lhsInst].operands[1]; instructions[i] = {OpCode::FMA, {a, b, rhs}}; // remove instruction - auto none = Operand::none(); instructions[lhsInst] = {OpCode::NOP, {none, none, none}}; // update uses, note that we need to maintain the order of the indices opUses[lhsInst].clear(); auto updateUses = [&](Operand x) { - if (!x.isResult() && !x.isConst()) return; - auto uses = getUses(x); - auto iter1 = findUse(*uses, lhsInst); - DEBUG_ASSERT(*iter1 == lhsInst, logicErr, "expected use"); - uses->erase(iter1); - auto iter2 = findUse(*uses, i); - // make sure there is no duplicate - if (iter2 == uses->cend() || *iter2 != i) uses->insert(iter2, i); + removeUse(x, lhsInst); + addUse(x, i); }; updateUses(a); if (a != b) updateUses(b); @@ -273,6 +279,281 @@ void Context::peephole() { } } +void Context::optimizeAffine() { + const auto none = Operand::none(); + std::vector affineValues; + affineValues.reserve(instructions.size()); + unordered_map avcache; + + auto getConstant = [&](Operand operand) -> std::optional { + if (operand.isConst()) return constants[operand.toConstIndex()]; + if (operand.isResult() && affineValues[operand.toInstIndex()].a == 0.0) + return affineValues[operand.toInstIndex()].b; + return {}; + }; + + auto replaceInst = [&](int from, int to) { + auto fromInst = Operand{from + 1}; + auto toInst = Operand{to + 1}; + for (auto use : opUses[from]) { + for (auto &operand : instructions[use].operands) + if (operand == fromInst) operand = toInst; + } + opUses[from].clear(); + instructions[from] = {OpCode::NOP, {none, none, none}}; + }; + + // abstract interpretation to figure out affine values for each instruction, + // and replace them as appropriate + // note that we still need constant propagation because this abstract + // interpretation can generate constants + for (size_t i = 0; i < instructions.size(); i++) { + auto &inst = instructions[i]; + AffineValue result = AffineValue(static_cast(i), 1, 0); + switch (inst.op) { + // notably, neg is special among these unary opcode + case OpCode::ABS: + case OpCode::EXP: + case OpCode::LOG: + case OpCode::SQRT: + case OpCode::FLOOR: + case OpCode::CEIL: + case OpCode::ROUND: + case OpCode::SIN: + case OpCode::COS: + case OpCode::TAN: + case OpCode::ASIN: + case OpCode::ACOS: + case OpCode::ATAN: { + auto x = getConstant(inst.operands[0]); + if (x.has_value()) + result = AffineValue( + EvalContext::handle_unary(inst.op, x.value())); + break; + } + case OpCode::NEG: + if (inst.operands[0].isConst()) + result = AffineValue(-constants[inst.operands[0].toConstIndex()]); + else if (inst.operands[0].isResult()) { + auto av = affineValues[inst.operands[0].toInstIndex()]; + result = AffineValue(av.var, -av.a, -av.b); + } + break; + case OpCode::DIV: { + // TODO: handle the case where lhs is divisible by rhs despite rhs is + // not a constant + auto rhs = getConstant(inst.operands[1]); + if (rhs.has_value()) { + if (inst.operands[0].isConst()) { + result = AffineValue(constants[inst.operands[0].toConstIndex()] / + rhs.value()); + } else if (inst.operands[0].isResult()) { + auto av = affineValues[inst.operands[0].toInstIndex()]; + result = + AffineValue(av.var, av.a / rhs.value(), av.b / rhs.value()); + } + } + break; + } + case OpCode::MOD: + case OpCode::MIN: + case OpCode::MAX: + case OpCode::EQ: + case OpCode::GT: { + // TODO: we can do better than just constant propagation... + auto lhs = getConstant(inst.operands[0]); + auto rhs = getConstant(inst.operands[1]); + if (lhs.has_value() && rhs.has_value()) + result = AffineValue(EvalContext::handle_binary( + inst.op, lhs.value(), rhs.value())); + break; + } + case OpCode::ADD: { + auto x = inst.operands[0]; + auto y = inst.operands[1]; + auto lhs = getConstant(x); + auto rhs = getConstant(y); + if (lhs.has_value() && rhs.has_value()) { + result = AffineValue(lhs.value() + rhs.value()); + } else if (lhs.has_value() && y.isResult()) { + result = affineValues[y.toInstIndex()]; + result.b += lhs.value(); + } else if (rhs.has_value() && x.isResult()) { + result = affineValues[x.toInstIndex()]; + result.b += rhs.value(); + } else if (x.isResult() && y.isResult()) { + if (affineValues[x.toInstIndex()].var == + affineValues[y.toInstIndex()].var) { + auto other = affineValues[y.toInstIndex()]; + result = affineValues[x.toInstIndex()]; + result.a += other.a; + result.b += other.b; + } + } + } + case OpCode::SUB: { + auto x = inst.operands[0]; + auto y = inst.operands[1]; + auto lhs = getConstant(x); + auto rhs = getConstant(y); + if (lhs.has_value() && rhs.has_value()) { + result = AffineValue(lhs.value() - rhs.value()); + } else if (lhs.has_value() && y.isResult()) { + result = affineValues[y.toInstIndex()]; + result.a = -result.a; + result.b = lhs.value() - result.b; + } else if (rhs.has_value() && x.isResult()) { + result = affineValues[x.toInstIndex()]; + result.b -= rhs.value(); + } else if (x.isResult() && y.isResult()) { + if (affineValues[x.toInstIndex()].var == + affineValues[y.toInstIndex()].var) { + auto other = affineValues[y.toInstIndex()]; + result = affineValues[x.toInstIndex()]; + result.a -= other.a; + result.b -= other.b; + } + } + break; + } + case OpCode::MUL: { + auto x = inst.operands[0]; + auto y = inst.operands[1]; + auto lhs = getConstant(x); + auto rhs = getConstant(y); + if (lhs.has_value() && rhs.has_value()) { + result = AffineValue(lhs.value() * rhs.value()); + } else if (lhs.has_value() && y.isResult()) { + result = affineValues[y.toInstIndex()]; + result.a *= lhs.value(); + result.b *= lhs.value(); + } else if (rhs.has_value() && x.isResult()) { + result = affineValues[x.toInstIndex()]; + result.a *= rhs.value(); + result.b *= rhs.value(); + } + break; + } + default: + // TODO: handle FMA as well? + break; + } + affineValues.push_back(result); + if (result.var != static_cast(i)) { + // we did evaluate something + auto pair = avcache.insert({result, static_cast(i)}); + if (!pair.second) { + // this result is being optimized away, replace uses with the value + replaceInst(static_cast(i), pair.first->second); + } else { + for (auto operand : inst.operands) removeUse(operand, i); + addUse(Operand{result.var + 1}, i); + // modify instruction + // FIXME: handle constant uses... + if (result.a == 1.0 && result.b == 0.0) { + // this result is being optimized away, replace uses with the value + pair.first->second = result.var; + replaceInst(static_cast(i), result.var); + } else if (result.a == 1.0) { + auto constant = addConstant(result.b); + addUse(constant, i); + instructions[i] = {OpCode::ADD, + {constant, Operand{result.var + 1}, none}}; + } else if (result.a == -1.0) { + auto constant = addConstant(result.b); + addUse(constant, i); + instructions[i] = {OpCode::SUB, + {constant, Operand{result.var + 1}, none}}; + } else if (result.b == 0.0) { + auto constant = addConstant(result.a); + addUse(constant, i); + instructions[i] = {OpCode::MUL, + {constant, Operand{result.var + 1}, none}}; + } else { + auto a = addConstant(result.a); + auto b = addConstant(result.b); + addUse(a, i); + addUse(b, i); + instructions[i] = {OpCode::FMA, {a, Operand{result.var + 1}, b}}; + } + } + } + } +} + +void Context::schedule() { + cache.clear(); + opUses.clear(); + for (auto &uses : constantUses) uses.clear(); + auto oldInstructions = std::move(this->instructions); + // compute depth in DG + std::vector levelMap; + levelMap.reserve(oldInstructions.size()); + for (size_t i = 0; i < oldInstructions.size(); i++) { + const auto &inst = oldInstructions[i]; + size_t maxLevel = 0; + for (auto operand : inst.operands) { + if (!operand.isResult()) continue; + maxLevel = std::max(maxLevel, levelMap[operand.toInstIndex()]); + } + levelMap.push_back(maxLevel + 1); + } + + std::vector computedInst(oldInstructions.size(), Operand::none()); + std::vector stack; + if (oldInstructions.back().operands[0].isResult()) + stack.push_back(oldInstructions.back().operands[0].toInstIndex()); + + auto requiresComputation = [&computedInst](Operand operand) { + return operand.isResult() && computedInst[operand.toInstIndex()].isNone(); + }; + auto toNewOperand = [&computedInst](Operand old) { + if (old.isResult()) return computedInst[old.toInstIndex()]; + return old; + }; + + while (!stack.empty()) { + int numResults = 0; + auto back = stack.back(); + if (!computedInst[back].isNone()) { + stack.pop_back(); + continue; + } + auto &inst = oldInstructions[back]; + std::array costs = {0, 0, 0}; + std::array ids = {0, 1, 2}; + for (auto i : ids) + if (requiresComputation(inst.operands[i])) { + numResults += 1; + costs[i] = levelMap[inst.operands[i].toInstIndex()]; + } + if (numResults > 0) { + std::sort(ids.begin(), ids.end(), + [&costs](size_t x, size_t y) { return costs[x] < costs[y]; }); + for (size_t x : ids) + if (requiresComputation(inst.operands[x])) + stack.push_back(inst.operands[x].toInstIndex()); + } else { + stack.pop_back(); + std::array newOperands; + for (int i : ids) newOperands[i] = toNewOperand(inst.operands[i]); + Operand result = addInstructionNoCache({inst.op, newOperands}); + computedInst[back] = result; + } + } + addInstructionNoCache( + {OpCode::RETURN, + {computedInst[oldInstructions.back().operands[0].toInstIndex()], + Operand::none(), Operand::none()}}); +} + +void Context::optimize() { + optimizeAffine(); + combineFMA(); + schedule(); + dump(); +} + struct RegEntry { size_t nextUse; Operand operand; @@ -353,7 +634,7 @@ std::pair, size_t> Context::genTape() { if (iter == spills.end()) { DEBUG_ASSERT(operand.isConst(), logicErr, "can only materialize constants"); - tape.insert(tape.end(), {static_cast(OpCode::CONST), reg}); + tape.insert(tape.end(), {static_cast(OpCode::CONSTANT), reg}); addImmediate(tape, constants[operand.toConstIndex()]); } else { tape.insert(tape.end(), {static_cast(OpCode::LOAD), reg}); @@ -391,7 +672,8 @@ std::pair, size_t> Context::genTape() { // insert it back with new next use // because it is not at the end of its lifetime, the incremented // iterator is guaranteed to be valid - insertRegCache({*(findUse(*uses, inst) + 1), instOperands[i], regs[i]}); + auto nextUse = *(findUse(*uses, inst) + 1); + insertRegCache({nextUse, instOperands[i], regs[i]}); } } return regs; @@ -399,11 +681,10 @@ std::pair, size_t> Context::genTape() { for (size_t i = 0; i < instructions.size(); i++) { auto &inst = instructions[i]; - if (inst.op == OpCode::NOP) continue; auto instOp = Operand{static_cast(i) + 1}; auto uses = getUses(instOp); - // avoid useless ops - if (inst.op != OpCode::RETURN && uses->empty()) continue; + if (inst.op == OpCode::NOP) continue; + // if (inst.op != OpCode::RETURN && uses->empty()) continue; auto tmp = handleOperands(inst.operands, i); if (inst.op == OpCode::RETURN) { tape.insert(tape.end(), {static_cast(inst.op), tmp[0]}); @@ -411,7 +692,11 @@ std::pair, size_t> Context::genTape() { } // note that we may spill the operand register, but that is fine uint8_t reg = allocateReg(); - insertRegCache({uses->front(), instOp, reg}); + if (uses->empty()) { + availableReg.push_back(reg); + } else { + insertRegCache({uses->front(), instOp, reg}); + } tape.insert(tape.end(), {static_cast(inst.op), reg}); for (size_t j : {0, 1, 2}) { if (inst.operands[j].isNone()) break; diff --git a/src/sdf/context.h b/src/sdf/context.h index 34d6187d9..f9c95870b 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -80,11 +80,11 @@ struct std::hash { namespace manifold::sdf { class Context { public: - using UsesVector = small_vector; + using UsesVector = std::vector; Operand addConstant(double d); Operand addInstruction(Instruction); - void peephole(); + void optimize(); void reschedule(); std::pair, size_t> genTape(); @@ -106,8 +106,12 @@ class Context { unordered_map cache; std::optional trySimplify(Instruction); - Instruction strengthReduction(Instruction); Operand addInstructionNoCache(Instruction); + void combineFMA(); + void optimizeAffine(); + void addUse(Operand operand, size_t inst); + void removeUse(Operand operand, size_t inst); + void schedule(); UsesVector* getUses(Operand operand) { if (operand.isResult()) { diff --git a/src/sdf/tape.h b/src/sdf/tape.h index f4c403f6a..77cba4611 100644 --- a/src/sdf/tape.h +++ b/src/sdf/tape.h @@ -27,7 +27,7 @@ namespace manifold::sdf { enum class OpCode : uint8_t { NOP, RETURN, - CONST, + CONSTANT, STORE, LOAD, @@ -54,8 +54,6 @@ enum class OpCode : uint8_t { MAX, EQ, GT, - AND, - OR, // fast binary operations ADD, @@ -115,7 +113,7 @@ struct EvalContext { Domain x = buffer[tape[i + 2]]; buffer[tape[i + 1]] = handle_unary(current, x); i += 3; - } else if (current == OpCode::CONST) { + } else if (current == OpCode::CONSTANT) { double x; std::memcpy(&x, tape.data() + i + 2, sizeof(x)); buffer[tape[i + 1]] = Domain(x); @@ -192,10 +190,6 @@ inline double EvalContext::handle_binary(OpCode op, double lhs, return lhs == rhs ? 1.0 : 0.0; case OpCode::GT: return lhs > rhs ? 1.0 : 0.0; - case OpCode::AND: - return (lhs == 1.0 && rhs == 1.0) ? 1.0 : 0.0; - case OpCode::OR: - return (lhs == 1.0 || rhs == 1.0) ? 1.0 : 0.0; default: return 0; } @@ -204,7 +198,7 @@ inline double EvalContext::handle_binary(OpCode op, double lhs, template <> inline double EvalContext::handle_choice(double cond, double lhs, double rhs) { - if (cond == 1.0) return lhs; + if (cond != 0.0) return lhs; return rhs; } @@ -272,10 +266,6 @@ inline Interval EvalContext>::handle_binary( return lhs == rhs; case OpCode::GT: return lhs > rhs; - case OpCode::AND: - return lhs.logical_and(rhs); - case OpCode::OR: - return lhs.logical_or(rhs); default: return {0.0, 0.0}; } @@ -285,7 +275,7 @@ template <> inline Interval EvalContext>::handle_choice( Interval cond, Interval lhs, Interval rhs) { if (cond.is_const()) { - if (cond.lower == 1.0) return lhs; + if (cond.lower != 0.0) return lhs; return rhs; } return lhs.merge(rhs); @@ -297,7 +287,7 @@ inline std::string dumpOpCode(OpCode op) { return "NOP"; case OpCode::RETURN: return "RETURN"; - case OpCode::CONST: + case OpCode::CONSTANT: return "CONST"; case OpCode::LOAD: return "LOAD"; @@ -343,10 +333,6 @@ inline std::string dumpOpCode(OpCode op) { return "EQ"; case OpCode::GT: return "GT"; - case OpCode::AND: - return "AND"; - case OpCode::OR: - return "OR"; case OpCode::ADD: return "ADD"; case OpCode::SUB: diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index cc9a7197e..b9f0606b8 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -14,6 +14,8 @@ #include "value.h" +#include + #include "../utils.h" #include "context.h" #include "tape.h" @@ -91,12 +93,12 @@ Value Value::operator>(const Value& other) const { Value Value::operator&&(const Value& other) const { return Value(ValueKind::OPERATION, std::make_shared( - OpCode::AND, *this, other, Invalid())); + OpCode::MUL, *this, other, Invalid())); } Value Value::operator||(const Value& other) const { return Value(ValueKind::OPERATION, std::make_shared( - OpCode::OR, *this, other, Invalid())); + OpCode::ADD, *this, other, Invalid())); } Value Value::abs() const { @@ -205,13 +207,8 @@ std::pair, size_t> Value::genTape() const { Context ctx; unordered_map cache; std::vector stack; - cache.reserve(128); - stack.reserve(128); - - if (kind == ValueKind::OPERATION) stack.push_back(std::get(v).get()); - - auto none = Operand::none(); + const auto none = Operand::none(); bool ready = true; auto getOperand = [&](const Value& x, bool pushStack) { switch (x.kind) { @@ -236,7 +233,13 @@ std::pair, size_t> Value::genTape() const { return none; } }; + + auto start = std::chrono::high_resolution_clock::now(); + if (kind == ValueKind::OPERATION) stack.push_back(std::get(v).get()); + + int count = 0; while (!stack.empty()) { + count++; ready = true; auto current = stack.back(); Operand a = getOperand(current->operands[0], true); @@ -252,8 +255,28 @@ std::pair, size_t> Value::genTape() const { Operand result = getOperand(*this, false); ctx.addInstruction({OpCode::RETURN, {result, none, none}}); - ctx.peephole(); - return ctx.genTape(); + auto end = std::chrono::high_resolution_clock::now(); + auto time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("serialization: %dus with %d nodes\n", time, count); + start = std::chrono::high_resolution_clock::now(); + ctx.optimize(); + end = std::chrono::high_resolution_clock::now(); + time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("optimize: %dus\n", time); + + start = std::chrono::high_resolution_clock::now(); + auto tape = ctx.genTape(); + end = std::chrono::high_resolution_clock::now(); + time = static_cast( + std::chrono::duration_cast(end - start) + .count()); + printf("codegen: %dus with length %ld\n", time, tape.first.size()); + + return tape; } } // namespace manifold::sdf diff --git a/test/sdf_tape_test.cpp b/test/sdf_tape_test.cpp index aef8c46b9..2b13a2171 100644 --- a/test/sdf_tape_test.cpp +++ b/test/sdf_tape_test.cpp @@ -81,7 +81,7 @@ TEST(TAPE, Gyroid) { ctxSimple.buffer[0] = x; ctxSimple.buffer[1] = y; ctxSimple.buffer[2] = z; - ASSERT_NEAR(ctxSimple.eval(), gyroid({x, y, z}), 1e-12); + ASSERT_NEAR(ctxSimple.eval(), gyroid({x, y, z}), 1e-6); } } } @@ -176,3 +176,26 @@ TEST(TAPE, Blobs) { .count()); printf("interval evaluation: %dus\n", time); } + +TEST(TAPE, Blobs2) { + auto lengthFn = [](Value x, Value y, Value z) { + return (x * x + y * y + z * z).sqrt(); + }; + auto smoothstepFn = [](Value edge0, Value edge1, Value a) { + auto x = ((a - edge0) / (edge1 - edge0)) + .min(Value::Constant(1)) + .max(Value::Constant(0)); + return x * x * (Value::Constant(3) - Value::Constant(2) * x); + }; + Value d = Value::Constant(0); + for (int i = 0; i < 1000; i++) { + auto f = double(i + 1); + auto tmp = smoothstepFn( + Value::Constant(-1), Value::Constant(1), + Value::Constant(f).abs() - lengthFn(Value::Constant(f) - Value::X(), + Value::Constant(f) - Value::Y(), + Value::Constant(f) - Value::Z())); + d = d + tmp; + } + auto tape = d.genTape(); +} From f5962953e5e5906950d0425acaa834149e5b5e89 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:44:29 +0800 Subject: [PATCH 29/37] make affine value optimization more general --- src/sdf/context.cpp | 45 +++++++++++++++++++++------------------------ src/sdf/context.h | 2 ++ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 6182c9934..2525765f0 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -24,13 +24,12 @@ struct AffineValue { // value = var * a + b - int var; + Operand var; double a; double b; - AffineValue(int var, double a, double b) : var(var), a(a), b(b) {} - AffineValue(double constant) - : var(std::numeric_limits::max()), a(0.0), b(constant) {} + AffineValue(Operand var, double a, double b) : var(var), a(a), b(b) {} + AffineValue(double constant) : var(Operand::none()), a(0.0), b(constant) {} bool operator==(const AffineValue &other) const { return var == other.var && a == other.a && b == other.b; } @@ -39,7 +38,7 @@ struct AffineValue { template <> struct std::hash { size_t operator()(const AffineValue &value) const { - size_t h = std::hash()(value.var); + size_t h = std::hash()(value.var.id); hash_combine(h, value.a, value.b); return h; } @@ -71,8 +70,8 @@ void Context::dump() const { } Operand Context::addConstant(double d) { - auto result = constantsIds.insert( - {d, Operand{-4 - static_cast(constants.size())}}); + auto result = + constantsIds.insert({d, Operand::fromConstIndex(constants.size())}); if (result.second) { constants.push_back(d); constantUses.emplace_back(); @@ -224,7 +223,7 @@ Operand Context::addInstructionNoCache(Instruction inst) { // avoid duplicates if (target->empty() || target->back() != i) target->push_back(i); } - return {static_cast(i) + 1}; + return Operand::fromInstIndex(i); } Context::UsesVector::const_iterator findUse(const Context::UsesVector &uses, @@ -293,8 +292,8 @@ void Context::optimizeAffine() { }; auto replaceInst = [&](int from, int to) { - auto fromInst = Operand{from + 1}; - auto toInst = Operand{to + 1}; + auto fromInst = Operand::fromInstIndex(from); + auto toInst = Operand::fromInstIndex(to); for (auto use : opUses[from]) { for (auto &operand : instructions[use].operands) if (operand == fromInst) operand = toInst; @@ -309,7 +308,7 @@ void Context::optimizeAffine() { // interpretation can generate constants for (size_t i = 0; i < instructions.size(); i++) { auto &inst = instructions[i]; - AffineValue result = AffineValue(static_cast(i), 1, 0); + AffineValue result = AffineValue(Operand::fromInstIndex(i), 1, 0); switch (inst.op) { // notably, neg is special among these unary opcode case OpCode::ABS: @@ -439,7 +438,7 @@ void Context::optimizeAffine() { break; } affineValues.push_back(result); - if (result.var != static_cast(i)) { + if (result.var != Operand::fromInstIndex(i)) { // we did evaluate something auto pair = avcache.insert({result, static_cast(i)}); if (!pair.second) { @@ -447,34 +446,32 @@ void Context::optimizeAffine() { replaceInst(static_cast(i), pair.first->second); } else { for (auto operand : inst.operands) removeUse(operand, i); - addUse(Operand{result.var + 1}, i); + addUse(result.var, i); // modify instruction // FIXME: handle constant uses... - if (result.a == 1.0 && result.b == 0.0) { + if (result.a == 1.0 && result.b == 0.0 && result.var.isResult()) { // this result is being optimized away, replace uses with the value - pair.first->second = result.var; - replaceInst(static_cast(i), result.var); + pair.first->second = result.var.toInstIndex(); + replaceInst(static_cast(i), + static_cast(result.var.toInstIndex())); } else if (result.a == 1.0) { auto constant = addConstant(result.b); addUse(constant, i); - instructions[i] = {OpCode::ADD, - {constant, Operand{result.var + 1}, none}}; + instructions[i] = {OpCode::ADD, {constant, result.var, none}}; } else if (result.a == -1.0) { auto constant = addConstant(result.b); addUse(constant, i); - instructions[i] = {OpCode::SUB, - {constant, Operand{result.var + 1}, none}}; + instructions[i] = {OpCode::SUB, {constant, result.var, none}}; } else if (result.b == 0.0) { auto constant = addConstant(result.a); addUse(constant, i); - instructions[i] = {OpCode::MUL, - {constant, Operand{result.var + 1}, none}}; + instructions[i] = {OpCode::MUL, {constant, result.var, none}}; } else { auto a = addConstant(result.a); auto b = addConstant(result.b); addUse(a, i); addUse(b, i); - instructions[i] = {OpCode::FMA, {a, Operand{result.var + 1}, b}}; + instructions[i] = {OpCode::FMA, {a, result.var, b}}; } } } @@ -681,7 +678,7 @@ std::pair, size_t> Context::genTape() { for (size_t i = 0; i < instructions.size(); i++) { auto &inst = instructions[i]; - auto instOp = Operand{static_cast(i) + 1}; + auto instOp = Operand::fromInstIndex(i); auto uses = getUses(instOp); if (inst.op == OpCode::NOP) continue; // if (inst.op != OpCode::RETURN && uses->empty()) continue; diff --git a/src/sdf/context.h b/src/sdf/context.h index f9c95870b..da6a7e741 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -30,6 +30,8 @@ struct Operand { int id; static Operand none() { return {0}; } + static Operand fromInstIndex(size_t i) { return {static_cast(i) + 1}; } + static Operand fromConstIndex(size_t i) { return {-static_cast(i) - 4}; } bool isConst() const { return id <= -4; } bool isResult() const { return id > 0; } bool isNone() const { return id == 0; } From 19d41111d69a67648a41ef2214c127db5d72bbbe Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:50:40 +0800 Subject: [PATCH 30/37] generate negation --- src/sdf/context.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 2525765f0..088ead94a 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -458,6 +458,8 @@ void Context::optimizeAffine() { auto constant = addConstant(result.b); addUse(constant, i); instructions[i] = {OpCode::ADD, {constant, result.var, none}}; + } else if (result.a == -1.0 && result.b == 0.0) { + instructions[i] = {OpCode::NEG, {result.var, none, none}}; } else if (result.a == -1.0) { auto constant = addConstant(result.b); addUse(constant, i); From b989c0623d0d68cfd7c78162d47f32456072b179 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:51:49 +0800 Subject: [PATCH 31/37] bypass more simplification --- src/sdf/context.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 088ead94a..8057e3f17 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -98,6 +98,11 @@ Operand Context::addInstruction(Instruction inst) { // common subexpression elimination auto entry = cache.find(inst); if (entry != cache.end()) return entry->second; + auto simplified = trySimplify(inst); + if (simplified.has_value()) { + cache.insert({inst, simplified.value()}); + return simplified.value(); + } auto result = addInstructionNoCache(inst); cache.insert({inst, result}); return result; @@ -210,9 +215,6 @@ std::optional Context::trySimplify(Instruction inst) { // bypass the cache because we don't expect to have more common subexpressions // after optimizations Operand Context::addInstructionNoCache(Instruction inst) { - auto simplified = trySimplify(inst); - if (simplified.has_value()) return simplified.value(); - size_t i = instructions.size(); instructions.push_back(inst); opUses.emplace_back(); From 5f1c19f3975e96d668c3af1e0b5f3abcb5aea522 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 28 Dec 2024 23:53:56 +0800 Subject: [PATCH 32/37] remove fixme --- src/sdf/context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 8057e3f17..3200d08bf 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -450,7 +450,6 @@ void Context::optimizeAffine() { for (auto operand : inst.operands) removeUse(operand, i); addUse(result.var, i); // modify instruction - // FIXME: handle constant uses... if (result.a == 1.0 && result.b == 0.0 && result.var.isResult()) { // this result is being optimized away, replace uses with the value pair.first->second = result.var.toInstIndex(); From 15553260d24bfe84b84e214b75d7b9d01e27444d Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 29 Dec 2024 15:29:59 +0800 Subject: [PATCH 33/37] affine value optimization subsumes constant propagation --- src/sdf/context.cpp | 160 +++++++++++++------------------------------- src/sdf/context.h | 2 +- 2 files changed, 48 insertions(+), 114 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 3200d08bf..2ef15360d 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -98,120 +98,11 @@ Operand Context::addInstruction(Instruction inst) { // common subexpression elimination auto entry = cache.find(inst); if (entry != cache.end()) return entry->second; - auto simplified = trySimplify(inst); - if (simplified.has_value()) { - cache.insert({inst, simplified.value()}); - return simplified.value(); - } auto result = addInstructionNoCache(inst); cache.insert({inst, result}); return result; } -std::optional Context::trySimplify(Instruction inst) { - // constant choice - auto op = inst.op; - auto &operands = inst.operands; - if (op == OpCode::CHOICE && operands[0].isConst()) { - if (constants[operands[0].toConstIndex()] == 1.0) return operands[1]; - return operands[2]; - } - // constant propagation - bool all_constants = true; - for (auto operand : operands) { - if (!operand.isConst() && !operand.isNone()) all_constants = false; - } - // we should not do anything about returning a constant... - if (all_constants && op != OpCode::RETURN) { - double result = 0.0; - switch (op) { - case OpCode::NOP: - case OpCode::RETURN: - case OpCode::CONSTANT: - case OpCode::STORE: - case OpCode::LOAD: - break; - case OpCode::ABS: - case OpCode::NEG: - case OpCode::EXP: - case OpCode::LOG: - case OpCode::SQRT: - case OpCode::FLOOR: - case OpCode::CEIL: - case OpCode::ROUND: - case OpCode::SIN: - case OpCode::COS: - case OpCode::TAN: - case OpCode::ASIN: - case OpCode::ACOS: - case OpCode::ATAN: - result = EvalContext::handle_unary( - op, constants[operands[0].toConstIndex()]); - break; - case OpCode::DIV: - case OpCode::MOD: - case OpCode::MIN: - case OpCode::MAX: - case OpCode::EQ: - case OpCode::GT: - result = EvalContext::handle_binary( - op, constants[operands[0].toConstIndex()], - constants[operands[1].toConstIndex()]); - break; - case OpCode::ADD: - result = constants[operands[0].toConstIndex()] + - constants[operands[1].toConstIndex()]; - break; - case OpCode::SUB: - result = constants[operands[0].toConstIndex()] - - constants[operands[1].toConstIndex()]; - break; - case OpCode::MUL: - result = constants[operands[0].toConstIndex()] * - constants[operands[1].toConstIndex()]; - break; - case OpCode::FMA: - result = constants[operands[0].toConstIndex()] * - constants[operands[1].toConstIndex()] + - constants[operands[2].toConstIndex()]; - break; - case OpCode::CHOICE: - // should be unreachable - DEBUG_ASSERT(false, logicErr, "unreachable"); - break; - } - return addConstant(result); - } - - // simple simplifications - if (op == OpCode::ADD) { - // add is commutative, so if there is a constant, it must be on the left - // 0 + x => x - if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 0.0) - return operands[1]; - } - if (op == OpCode::SUB) { - // x - 0 => x - if (operands[1].isConst() && constants[operands[1].toConstIndex()] == 0.0) - return operands[0]; - } - if (op == OpCode::MUL) { - // mul is commutative, so if there is a constant, it must be on the left - // 0 * x => 0 - if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 0.0) - return operands[0]; - // 1 * x => x - if (operands[0].isConst() && constants[operands[0].toConstIndex()] == 1.0) - return operands[1]; - } - if (op == OpCode::DIV) { - if (operands[1].isConst() && constants[operands[1].toConstIndex()] == 1.0) - return operands[0]; - } - - return {}; -} - // bypass the cache because we don't expect to have more common subexpressions // after optimizations Operand Context::addInstructionNoCache(Instruction inst) { @@ -312,6 +203,12 @@ void Context::optimizeAffine() { auto &inst = instructions[i]; AffineValue result = AffineValue(Operand::fromInstIndex(i), 1, 0); switch (inst.op) { + case OpCode::NOP: + case OpCode::RETURN: + case OpCode::CONSTANT: + case OpCode::LOAD: + case OpCode::STORE: + break; // notably, neg is special among these unary opcode case OpCode::ABS: case OpCode::EXP: @@ -391,6 +288,7 @@ void Context::optimizeAffine() { result.b += other.b; } } + break; } case OpCode::SUB: { auto x = inst.operands[0]; @@ -435,9 +333,40 @@ void Context::optimizeAffine() { } break; } - default: - // TODO: handle FMA as well? + case OpCode::FMA: { + auto x = inst.operands[0]; + auto y = inst.operands[1]; + auto z = inst.operands[2]; + auto a = getConstant(x); + auto b = getConstant(y); + auto c = getConstant(z); + // various cases... + if (b.has_value() && c.has_value()) { + result = affineValues[x.toInstIndex()]; + result.a *= b.value(); + result.b = result.b * b.value() + c.value(); + } else if (a.has_value() && c.has_value()) { + result = affineValues[y.toInstIndex()]; + result.a *= a.value(); + result.b = result.b * a.value() + c.value(); + } else if (a.has_value() && b.has_value()) { + result = affineValues[z.toInstIndex()]; + result.b += a.value() * b.value(); + } break; + } + case OpCode::CHOICE: { + auto c = getConstant(inst.operands[0]); + auto a = inst.operands[1]; + auto b = inst.operands[2]; + if (c.has_value()) { + if (c.value() == 0.0) + result = affineValues[b.toInstIndex()]; + else + result = affineValues[a.toInstIndex()]; + } + break; + } } affineValues.push_back(result); if (result.var != Operand::fromInstIndex(i)) { @@ -448,7 +377,7 @@ void Context::optimizeAffine() { replaceInst(static_cast(i), pair.first->second); } else { for (auto operand : inst.operands) removeUse(operand, i); - addUse(result.var, i); + if (!result.var.isNone()) addUse(result.var, i); // modify instruction if (result.a == 1.0 && result.b == 0.0 && result.var.isResult()) { // this result is being optimized away, replace uses with the value @@ -469,6 +398,12 @@ void Context::optimizeAffine() { auto constant = addConstant(result.a); addUse(constant, i); instructions[i] = {OpCode::MUL, {constant, result.var, none}}; + } else if (result.a == 0.0) { + auto a = addConstant(0.0); + auto b = addConstant(result.b); + addUse(a, i); + addUse(b, i); + instructions[i] = {OpCode::ADD, {b, a, none}}; } else { auto a = addConstant(result.a); auto b = addConstant(result.b); @@ -551,7 +486,6 @@ void Context::optimize() { optimizeAffine(); combineFMA(); schedule(); - dump(); } struct RegEntry { diff --git a/src/sdf/context.h b/src/sdf/context.h index da6a7e741..772366dc2 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -107,7 +107,7 @@ class Context { std::vector opUses; unordered_map cache; - std::optional trySimplify(Instruction); + // std::optional trySimplify(Instruction); Operand addInstructionNoCache(Instruction); void combineFMA(); void optimizeAffine(); From d0df1fb1181834d1557f981d05c6bc1c20d0634f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 29 Dec 2024 15:39:21 +0800 Subject: [PATCH 34/37] simplify code --- src/sdf/value.cpp | 175 ++++++++++------------------------------------ 1 file changed, 38 insertions(+), 137 deletions(-) diff --git a/src/sdf/value.cpp b/src/sdf/value.cpp index b9f0606b8..59136ea34 100644 --- a/src/sdf/value.cpp +++ b/src/sdf/value.cpp @@ -40,150 +40,51 @@ Value Value::Y() { return Value(ValueKind::Y, 0.0); } Value Value::Z() { return Value(ValueKind::Z, 0.0); } -Value Value::operator+(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::ADD, *this, other, Invalid())); -} - -Value Value::operator-(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::SUB, *this, other, Invalid())); -} - -Value Value::operator*(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::MUL, *this, other, Invalid())); -} - -Value Value::operator/(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::DIV, *this, other, Invalid())); -} - Value Value::cond(const Value& then, const Value& otherwise) const { return Value( ValueKind::OPERATION, std::make_shared(OpCode::CHOICE, *this, then, otherwise)); } -Value Value::mod(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::MOD, *this, other, Invalid())); -} - -Value Value::min(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::MIN, *this, other, Invalid())); -} - -Value Value::max(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::MAX, *this, other, Invalid())); -} - -Value Value::operator==(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::EQ, *this, other, Invalid())); -} - -Value Value::operator>(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::GT, *this, other, Invalid())); -} - -Value Value::operator&&(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::MUL, *this, other, Invalid())); -} - -Value Value::operator||(const Value& other) const { - return Value(ValueKind::OPERATION, std::make_shared( - OpCode::ADD, *this, other, Invalid())); -} - -Value Value::abs() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::ABS, *this, Invalid(), - Invalid())); -} - -Value Value::operator-() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::NEG, *this, Invalid(), - Invalid())); -} - -Value Value::exp() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::EXP, *this, Invalid(), - Invalid())); -} - -Value Value::log() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::LOG, *this, Invalid(), - Invalid())); -} - -Value Value::sqrt() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::SQRT, *this, Invalid(), - Invalid())); -} - -Value Value::floor() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::FLOOR, *this, Invalid(), - Invalid())); -} - -Value Value::ceil() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::CEIL, *this, Invalid(), - Invalid())); -} - -Value Value::round() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::ROUND, *this, Invalid(), - Invalid())); -} - -Value Value::sin() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::SIN, *this, Invalid(), - Invalid())); -} - -Value Value::cos() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::COS, *this, Invalid(), - Invalid())); -} - -Value Value::tan() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::TAN, *this, Invalid(), - Invalid())); -} - -Value Value::asin() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::ASIN, *this, Invalid(), - Invalid())); -} - -Value Value::acos() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::ACOS, *this, Invalid(), - Invalid())); -} +#define MAKE_UNARY(NAME, OPCODE) \ + Value Value::NAME() const { \ + return Value(ValueKind::OPERATION, \ + std::make_shared(OpCode::OPCODE, *this, \ + Invalid(), Invalid())); \ + } +#define MAKE_BINARY(NAME, OPCODE) \ + Value Value::NAME(const Value& other) const { \ + return Value(ValueKind::OPERATION, \ + std::make_shared(OpCode::OPCODE, *this, \ + other, Invalid())); \ + } -Value Value::atan() const { - return Value(ValueKind::OPERATION, - std::make_shared(OpCode::ATAN, *this, Invalid(), - Invalid())); -} +MAKE_UNARY(abs, ABS) +MAKE_UNARY(operator-, NEG) +MAKE_UNARY(exp, EXP) +MAKE_UNARY(log, LOG) +MAKE_UNARY(sqrt, SQRT) +MAKE_UNARY(floor, FLOOR) +MAKE_UNARY(ceil, CEIL) +MAKE_UNARY(round, ROUND) +MAKE_UNARY(sin, SIN) +MAKE_UNARY(cos, COS) +MAKE_UNARY(tan, TAN) +MAKE_UNARY(asin, ASIN) +MAKE_UNARY(acos, ACOS) +MAKE_UNARY(atan, ATAN) + +MAKE_BINARY(operator+, ADD) +MAKE_BINARY(operator-, SUB) +MAKE_BINARY(operator*, MUL) +MAKE_BINARY(operator/, DIV) +MAKE_BINARY(mod, MOD) +MAKE_BINARY(min, MIN) +MAKE_BINARY(max, MAX) +MAKE_BINARY(operator==, EQ) +MAKE_BINARY(operator>, GT) +MAKE_BINARY(operator&&, MUL) +MAKE_BINARY(operator||, ADD) Value::~Value() { using VO = std::shared_ptr; From 486c762e835983ed2df2518f4e028b5a9a109491 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 29 Dec 2024 16:28:46 +0800 Subject: [PATCH 35/37] simplify further --- src/sdf/context.cpp | 233 ++++++++++++++++---------------------------- 1 file changed, 85 insertions(+), 148 deletions(-) diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index 2ef15360d..a3b7d8e17 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -33,6 +33,8 @@ struct AffineValue { bool operator==(const AffineValue &other) const { return var == other.var && a == other.a && b == other.b; } + AffineValue operator+(double d) { return AffineValue(var, a, b + d); } + AffineValue operator*(double d) { return AffineValue(var, a * d, b * d); } }; template <> @@ -183,17 +185,42 @@ void Context::optimizeAffine() { return affineValues[operand.toInstIndex()].b; return {}; }; - auto replaceInst = [&](int from, int to) { - auto fromInst = Operand::fromInstIndex(from); - auto toInst = Operand::fromInstIndex(to); - for (auto use : opUses[from]) { + for (auto use : opUses[from]) for (auto &operand : instructions[use].operands) - if (operand == fromInst) operand = toInst; - } + if (operand == Operand::fromInstIndex(from)) + operand = Operand::fromInstIndex(to); opUses[from].clear(); instructions[from] = {OpCode::NOP, {none, none, none}}; }; + auto handleAdd = [&](Operand x, Operand y, + bool sub) -> std::optional { + auto lhs = getConstant(x); + auto rhs = getConstant(y); + if (lhs.has_value() && rhs.has_value()) { + return AffineValue(lhs.value() + rhs.value() * (sub ? -1 : 1)); + } else if (lhs.has_value() && y.isResult()) { + return affineValues[y.toInstIndex()] * (sub ? -1 : 1) + lhs.value(); + } else if (rhs.has_value() && x.isResult()) { + return affineValues[x.toInstIndex()] + rhs.value() * (sub ? -1 : 1); + } else if (x.isResult() && y.isResult()) { + if (affineValues[x.toInstIndex()].var == + affineValues[y.toInstIndex()].var) { + auto other = affineValues[y.toInstIndex()]; + auto result = affineValues[x.toInstIndex()]; + if (sub) other = other * -1; + result.a += other.a; + result.b += other.b; + return result; + } + } + return {}; + }; + auto constWithUse = [&](double constant, size_t inst) { + auto result = addConstant(constant); + addUse(result, inst); + return result; + }; // abstract interpretation to figure out affine values for each instruction, // and replace them as appropriate @@ -203,116 +230,33 @@ void Context::optimizeAffine() { auto &inst = instructions[i]; AffineValue result = AffineValue(Operand::fromInstIndex(i), 1, 0); switch (inst.op) { - case OpCode::NOP: - case OpCode::RETURN: - case OpCode::CONSTANT: - case OpCode::LOAD: - case OpCode::STORE: - break; - // notably, neg is special among these unary opcode - case OpCode::ABS: - case OpCode::EXP: - case OpCode::LOG: - case OpCode::SQRT: - case OpCode::FLOOR: - case OpCode::CEIL: - case OpCode::ROUND: - case OpCode::SIN: - case OpCode::COS: - case OpCode::TAN: - case OpCode::ASIN: - case OpCode::ACOS: - case OpCode::ATAN: { - auto x = getConstant(inst.operands[0]); - if (x.has_value()) - result = AffineValue( - EvalContext::handle_unary(inst.op, x.value())); - break; - } case OpCode::NEG: if (inst.operands[0].isConst()) result = AffineValue(-constants[inst.operands[0].toConstIndex()]); - else if (inst.operands[0].isResult()) { - auto av = affineValues[inst.operands[0].toInstIndex()]; - result = AffineValue(av.var, -av.a, -av.b); - } + else if (inst.operands[0].isResult()) + result = affineValues[inst.operands[0].toInstIndex()] * -1; break; case OpCode::DIV: { // TODO: handle the case where lhs is divisible by rhs despite rhs is // not a constant auto rhs = getConstant(inst.operands[1]); if (rhs.has_value()) { - if (inst.operands[0].isConst()) { - result = AffineValue(constants[inst.operands[0].toConstIndex()] / - rhs.value()); - } else if (inst.operands[0].isResult()) { - auto av = affineValues[inst.operands[0].toInstIndex()]; - result = - AffineValue(av.var, av.a / rhs.value(), av.b / rhs.value()); - } + if (inst.operands[0].isConst()) + result = constants[inst.operands[0].toConstIndex()] / rhs.value(); + else if (inst.operands[0].isResult()) + result = affineValues[inst.operands[0].toInstIndex()] * + (1 / rhs.value()); } break; } - case OpCode::MOD: - case OpCode::MIN: - case OpCode::MAX: - case OpCode::EQ: - case OpCode::GT: { - // TODO: we can do better than just constant propagation... - auto lhs = getConstant(inst.operands[0]); - auto rhs = getConstant(inst.operands[1]); - if (lhs.has_value() && rhs.has_value()) - result = AffineValue(EvalContext::handle_binary( - inst.op, lhs.value(), rhs.value())); - break; - } case OpCode::ADD: { - auto x = inst.operands[0]; - auto y = inst.operands[1]; - auto lhs = getConstant(x); - auto rhs = getConstant(y); - if (lhs.has_value() && rhs.has_value()) { - result = AffineValue(lhs.value() + rhs.value()); - } else if (lhs.has_value() && y.isResult()) { - result = affineValues[y.toInstIndex()]; - result.b += lhs.value(); - } else if (rhs.has_value() && x.isResult()) { - result = affineValues[x.toInstIndex()]; - result.b += rhs.value(); - } else if (x.isResult() && y.isResult()) { - if (affineValues[x.toInstIndex()].var == - affineValues[y.toInstIndex()].var) { - auto other = affineValues[y.toInstIndex()]; - result = affineValues[x.toInstIndex()]; - result.a += other.a; - result.b += other.b; - } - } + auto r = handleAdd(inst.operands[0], inst.operands[1], false); + if (r.has_value()) result = r.value(); break; } case OpCode::SUB: { - auto x = inst.operands[0]; - auto y = inst.operands[1]; - auto lhs = getConstant(x); - auto rhs = getConstant(y); - if (lhs.has_value() && rhs.has_value()) { - result = AffineValue(lhs.value() - rhs.value()); - } else if (lhs.has_value() && y.isResult()) { - result = affineValues[y.toInstIndex()]; - result.a = -result.a; - result.b = lhs.value() - result.b; - } else if (rhs.has_value() && x.isResult()) { - result = affineValues[x.toInstIndex()]; - result.b -= rhs.value(); - } else if (x.isResult() && y.isResult()) { - if (affineValues[x.toInstIndex()].var == - affineValues[y.toInstIndex()].var) { - auto other = affineValues[y.toInstIndex()]; - result = affineValues[x.toInstIndex()]; - result.a -= other.a; - result.b -= other.b; - } - } + auto r = handleAdd(inst.operands[0], inst.operands[1], true); + if (r.has_value()) result = r.value(); break; } case OpCode::MUL: { @@ -320,17 +264,12 @@ void Context::optimizeAffine() { auto y = inst.operands[1]; auto lhs = getConstant(x); auto rhs = getConstant(y); - if (lhs.has_value() && rhs.has_value()) { + if (lhs.has_value() && rhs.has_value()) result = AffineValue(lhs.value() * rhs.value()); - } else if (lhs.has_value() && y.isResult()) { - result = affineValues[y.toInstIndex()]; - result.a *= lhs.value(); - result.b *= lhs.value(); - } else if (rhs.has_value() && x.isResult()) { - result = affineValues[x.toInstIndex()]; - result.a *= rhs.value(); - result.b *= rhs.value(); - } + else if (lhs.has_value() && y.isResult()) + result = affineValues[y.toInstIndex()] * lhs.value(); + else if (rhs.has_value() && x.isResult()) + result = affineValues[x.toInstIndex()] * rhs.value(); break; } case OpCode::FMA: { @@ -341,18 +280,12 @@ void Context::optimizeAffine() { auto b = getConstant(y); auto c = getConstant(z); // various cases... - if (b.has_value() && c.has_value()) { - result = affineValues[x.toInstIndex()]; - result.a *= b.value(); - result.b = result.b * b.value() + c.value(); - } else if (a.has_value() && c.has_value()) { - result = affineValues[y.toInstIndex()]; - result.a *= a.value(); - result.b = result.b * a.value() + c.value(); - } else if (a.has_value() && b.has_value()) { - result = affineValues[z.toInstIndex()]; - result.b += a.value() * b.value(); - } + if (b.has_value() && c.has_value()) + result = affineValues[x.toInstIndex()] * b.value() + c.value(); + else if (a.has_value() && c.has_value()) + result = affineValues[y.toInstIndex()] * a.value() + c.value(); + else if (a.has_value() && b.has_value()) + result = affineValues[z.toInstIndex()] + a.value() * c.value(); break; } case OpCode::CHOICE: { @@ -367,6 +300,20 @@ void Context::optimizeAffine() { } break; } + default: { + using ectx = EvalContext; + if (inst.op >= OpCode::ABS && inst.op <= OpCode::ATAN) { + auto x = getConstant(inst.operands[0]); + if (x.has_value()) result = ectx::handle_unary(inst.op, x.value()); + } else if (inst.op >= OpCode::DIV && inst.op <= OpCode::GT) { + // TODO: we can do better than just constant propagation... + auto lhs = getConstant(inst.operands[0]); + auto rhs = getConstant(inst.operands[1]); + if (lhs.has_value() && rhs.has_value()) + result = ectx::handle_binary(inst.op, lhs.value(), rhs.value()); + } + break; + } } affineValues.push_back(result); if (result.var != Operand::fromInstIndex(i)) { @@ -385,31 +332,24 @@ void Context::optimizeAffine() { replaceInst(static_cast(i), static_cast(result.var.toInstIndex())); } else if (result.a == 1.0) { - auto constant = addConstant(result.b); - addUse(constant, i); - instructions[i] = {OpCode::ADD, {constant, result.var, none}}; + instructions[i] = {OpCode::ADD, + {constWithUse(result.b, i), result.var, none}}; } else if (result.a == -1.0 && result.b == 0.0) { instructions[i] = {OpCode::NEG, {result.var, none, none}}; } else if (result.a == -1.0) { - auto constant = addConstant(result.b); - addUse(constant, i); - instructions[i] = {OpCode::SUB, {constant, result.var, none}}; + instructions[i] = {OpCode::SUB, + {constWithUse(result.b, i), result.var, none}}; } else if (result.b == 0.0) { - auto constant = addConstant(result.a); - addUse(constant, i); - instructions[i] = {OpCode::MUL, {constant, result.var, none}}; + instructions[i] = {OpCode::MUL, + {constWithUse(result.a, i), result.var, none}}; } else if (result.a == 0.0) { - auto a = addConstant(0.0); - auto b = addConstant(result.b); - addUse(a, i); - addUse(b, i); - instructions[i] = {OpCode::ADD, {b, a, none}}; + instructions[i] = { + OpCode::ADD, + {constWithUse(result.b, i), constWithUse(0.0, i), none}}; } else { - auto a = addConstant(result.a); - auto b = addConstant(result.b); - addUse(a, i); - addUse(b, i); - instructions[i] = {OpCode::FMA, {a, result.var, b}}; + instructions[i] = {OpCode::FMA, + {constWithUse(result.a, i), result.var, + constWithUse(result.b, i)}}; } } } @@ -425,12 +365,10 @@ void Context::schedule() { std::vector levelMap; levelMap.reserve(oldInstructions.size()); for (size_t i = 0; i < oldInstructions.size(); i++) { - const auto &inst = oldInstructions[i]; size_t maxLevel = 0; - for (auto operand : inst.operands) { - if (!operand.isResult()) continue; - maxLevel = std::max(maxLevel, levelMap[operand.toInstIndex()]); - } + for (auto operand : oldInstructions[i].operands) + maxLevel = std::max( + maxLevel, operand.isResult() ? levelMap[operand.toInstIndex()] : 0); levelMap.push_back(maxLevel + 1); } @@ -443,8 +381,7 @@ void Context::schedule() { return operand.isResult() && computedInst[operand.toInstIndex()].isNone(); }; auto toNewOperand = [&computedInst](Operand old) { - if (old.isResult()) return computedInst[old.toInstIndex()]; - return old; + return old.isResult() ? computedInst[old.toInstIndex()] : old; }; while (!stack.empty()) { From 5ad914fb13b93f4a6d58209d6f2ec7bd7b2858bd Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 29 Dec 2024 20:50:23 +0800 Subject: [PATCH 36/37] fixed some bugs and simplify a bit --- src/sdf/small_vector.h | 250 ++++++++++------------------------------- 1 file changed, 62 insertions(+), 188 deletions(-) diff --git a/src/sdf/small_vector.h b/src/sdf/small_vector.h index 73e81742b..f61cd098d 100644 --- a/src/sdf/small_vector.h +++ b/src/sdf/small_vector.h @@ -19,8 +19,6 @@ namespace manifold { -// note that this will not work with non-trivial data (custom -// constructor/destructor) template class small_vector { std::array stack_; @@ -34,15 +32,16 @@ class small_vector { using const_reference = const value_type &; using pointer = T *; using const_pointer = const T *; + using iterator = T *; + using const_iterator = const T *; small_vector() = default; explicit small_vector(size_type count, const T &value = T()) { if (count <= N) { - std::fill(stack_.begin(), stack_.begin() + count, value); + std::uninitialized_fill_n(stack_.begin(), count, value); } else { - // use heap - heap_.resize(count, value); + heap_ = std::vector(count, value); } size_ = count; } @@ -58,7 +57,7 @@ class small_vector { small_vector(std::initializer_list initlist) { const auto input_size = initlist.size(); if (input_size <= N) { - std::copy(initlist.begin(), initlist.end(), stack_.begin()); + std::uninitialized_copy(initlist.begin(), initlist.end(), stack_.begin()); } else { std::copy(initlist.begin(), initlist.end(), std::back_inserter(heap_)); } @@ -66,23 +65,34 @@ class small_vector { } small_vector &operator=(const small_vector &rhs) { - stack_ = rhs.stack_; - heap_ = rhs.heap_; + if (this == &rhs) return *this; + if (size_ <= N) clear(); // clear initialized data + if (rhs.size_ <= N) { + std::uninitialized_copy(rhs.begin(), rhs.end(), stack_.begin()); + } else { + heap_ = rhs.heap_; + } size_ = rhs.size_; return *this; } small_vector &operator=(small_vector &&rhs) { - stack_ = std::move(rhs.stack_); - heap_ = std::move(rhs.heap_); + if (this == &rhs) return *this; + if (size_ <= N) clear(); // clear initialized data + if (rhs.size_ <= N) { + std::uninitialized_move(rhs.begin(), rhs.end(), stack_.begin()); + } else { + heap_ = std::move(rhs.heap_); + } size_ = rhs.size_; rhs.size_ = 0; return *this; } small_vector &operator=(std::initializer_list rhs) { + if (size_ <= N) clear(); // clear initialized data if (rhs.size() <= N) { - stack_ = rhs; + std::uninitialized_copy(rhs.begin(), rhs.end(), stack_.begin()); } else { heap_ = rhs; } @@ -90,83 +100,37 @@ class small_vector { } reference at(size_type pos) { - if (size_ <= N) { - return stack_.at(pos); - } else { - return heap_.at(pos); - } + return size_ <= N ? stack_.at(pos) : heap_.at(pos); } const_reference at(size_type pos) const { - if (size_ <= N) { - return stack_.at(pos); - } else { - return heap_.at(pos); - } + return size_ <= N ? stack_.at(pos) : heap_.at(pos); } reference operator[](size_type pos) { - if (size_ <= N) { - return stack_[pos]; - } else { - return heap_[pos]; - } + return size_ <= N ? stack_[pos] : heap_[pos]; } const_reference operator[](size_type pos) const { - if (size_ <= N) { - return stack_[pos]; - } else { - return heap_[pos]; - } + return size_ <= N ? stack_[pos] : heap_[pos]; } - reference front() { - if (size_ <= N) { - return stack_.front(); - } else { - return heap_.front(); - } - } + reference front() { return size_ <= N ? stack_.front() : heap_.front(); } const_reference front() const { - if (size_ <= N) { - return stack_.front(); - } else { - return heap_.front(); - } + return size_ <= N ? stack_.front() : heap_.front(); } - reference back() { - if (size_ <= N) { - return stack_[size_ - 1]; - } else { - return heap_[size_ - 1]; - } - } + reference back() { return size_ <= N ? stack_[size_ - 1] : heap_.back(); } const_reference back() const { - if (size_ <= N) { - return stack_[size_ - 1]; - } else { - return heap_[size_ - 1]; - } + return size_ <= N ? stack_[size_ - 1] : heap_.back(); } - pointer data() noexcept { - if (size_ <= N) { - return stack_.data(); - } else { - return heap_.data(); - } - } + pointer data() noexcept { return size_ <= N ? stack_.data() : heap_.data(); } const_pointer data() const noexcept { - if (size_ <= N) { - return stack_.data(); - } else { - return heap_.data(); - } + return size_ <= N ? stack_.data() : heap_.data(); } bool empty() const { return size_ == 0; } @@ -174,18 +138,15 @@ class small_vector { size_type size() const { return size_; } void shrink_to_fit() { - if (size_ > N) { - heap_.shrink_to_fit(); - } + if (size_ > N) heap_.shrink_to_fit(); } void push_back(const T &value) { if (size_ < N) { - stack_[size_] = value; + stack_[size_] = &value; } else { - if (size_ == N) { + if (size_ == N) std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); - } heap_.emplace_back(value); } size_ += 1; @@ -195,6 +156,7 @@ class small_vector { if (size_ == 0) return; if (size_ <= N) { size_ -= 1; + std::destroy_at(&stack_[size_]); } else { // currently using heap heap_.pop_back(); @@ -202,7 +164,7 @@ class small_vector { // now check if all data can fit on stack // if so, move back to stack if (size_ <= N) { - std::move(heap_.begin(), heap_.end(), stack_.begin()); + std::uninitialized_move(heap_.begin(), heap_.end(), stack_.begin()); heap_.clear(); } } @@ -215,26 +177,33 @@ class small_vector { if (size_ >= N) { // currently, all data on heap // move back to stack - std::move(heap_.begin(), heap_.end(), stack_.begin()); + std::uninitialized_move(heap_.begin(), heap_.begin() + count, + stack_.begin()); heap_.clear(); } else { - // all data already on stack - // just update size + if (size_ < count) + std::uninitialized_fill(stack_.begin() + size_, + stack_.begin() + count, value); + else if (count < size_) + std::destroy(stack_.begin() + count, stack_.begin() + size_); } } else { // new `count` of data is going to be on the heap // check if data is currently on the stack - if (size_ <= N) { - // move to heap - std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); - } + if (size_ <= N) + std::move(stack_.begin(), stack_.begin() + size_, + std::back_inserter(heap_)); heap_.resize(count, value); } size_ = count; } void clear() { - if (size_ > N) heap_.clear(); + if (size_ > N) { + heap_.clear(); + } else { + std::destroy(begin(), end()); + } size_ = 0; } @@ -244,114 +213,16 @@ class small_vector { std::swap(size_, other.size_); }; - class iterator { - public: - using self_type = iterator; - using value_type = T; - using reference = T &; - using pointer = T *; - using difference_type = int; - using iterator_category = std::bidirectional_iterator_tag; - iterator(pointer ptr) : ptr_(ptr) {} - self_type operator++() { - ptr_++; - return *this; - } - self_type operator++(int) { - self_type i = *this; - ptr_++; - return i; - } - self_type operator--() { - ptr_--; - return *this; - } - self_type operator--(int) { - self_type i = *this; - ptr_--; - return i; - } - self_type operator+(size_type i) const { return self_type(ptr_ + i); } - self_type operator-(size_type i) const { return self_type(ptr_ - i); } - reference operator*() { return *ptr_; } - const value_type &operator*() const { return *ptr_; } - pointer operator->() { return ptr_; } - const pointer operator->() const { return ptr_; } - bool operator==(const self_type &rhs) const { return ptr_ == rhs.ptr_; } - bool operator!=(const self_type &rhs) const { return ptr_ != rhs.ptr_; } - - private: - pointer ptr_; - }; + iterator begin() { return size_ <= N ? stack_.data() : heap_.data(); } - class const_iterator { - public: - using self_type = const_iterator; - using value_type = T; - using reference = const T &; - using pointer = const T *; - using difference_type = int; - using iterator_category = std::bidirectional_iterator_tag; - const_iterator(pointer ptr) : ptr_(ptr) {} - self_type operator++() { - ptr_++; - return *this; - } - self_type operator++(int) { - self_type i = *this; - ptr_++; - return i; - } - self_type operator--() { - ptr_--; - return *this; - } - self_type operator--(int) { - self_type i = *this; - ptr_--; - return i; - } - self_type operator+(size_type i) const { return self_type(ptr_ + i); } - self_type operator-(size_type i) const { return self_type(ptr_ - i); } - reference operator*() const { return *ptr_; } - pointer operator->() const { return ptr_; } - bool operator==(const self_type &rhs) const { return ptr_ == rhs.ptr_; } - bool operator!=(const self_type &rhs) const { return ptr_ != rhs.ptr_; } - - private: - pointer ptr_; - }; - - iterator begin() { - if (size_ <= N) { - return iterator(stack_.data()); - } else { - return iterator(heap_.data()); - } - } - - iterator end() { - if (size_ <= N) { - return iterator(stack_.data() + size_); - } else { - return iterator(heap_.data() + size_); - } - } + iterator end() { return (size_ <= N ? stack_.data() : heap_.data()) + size_; } const_iterator cbegin() const { - if (size_ <= N) { - return const_iterator(stack_.data()); - } else { - return const_iterator(heap_.data()); - } + return size_ <= N ? stack_.data() : heap_.data(); } const_iterator cend() const { - if (size_ <= N) { - return const_iterator(stack_.data() + size_); - } else { - return const_iterator(heap_.data() + size_); - } + return (size_ <= N ? stack_.data() : heap_.data()) + size_; } const_iterator begin() const { return cbegin(); } @@ -363,12 +234,14 @@ class small_vector { size_type i = std::distance(begin(), iter); if (size_ <= N) { if (i < size_ - 1) + // probably need a custom loop if we want to work with non-trivial data + // type std::move(stack_.begin() + i + 1, stack_.begin() + size_, stack_.begin() + i); } else { heap_.erase(heap_.begin() + i); if (size_ == N + 1) { - std::copy(heap_.begin(), heap_.end(), stack_.begin()); + std::uninitialized_move(heap_.begin(), heap_.end(), stack_.begin()); heap_.clear(); } } @@ -383,13 +256,14 @@ class small_vector { size_type i = std::distance(begin(), iter); if (size_ < N) { if (i < size_) + // probably need a custom loop if we want to work with non-trivial data + // type std::move_backward(stack_.begin() + i, stack_.begin() + size_, stack_.begin() + size_ + 1); stack_[i] = value; } else { - if (size_ == N) { - std::copy(stack_.begin(), stack_.end(), std::back_inserter(heap_)); - } + if (size_ == N) + std::move(stack_.begin(), stack_.end(), std::back_inserter(heap_)); heap_.insert(heap_.begin() + i, value); } size_ += 1; From 9716bc37df435170ad3f4b2954373387d978d67f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sun, 29 Dec 2024 22:31:34 +0800 Subject: [PATCH 37/37] update optimizing_tape --- src/sdf/affine_value.h | 42 +++++++++++++++++++++++++++++++++++++++ src/sdf/context.cpp | 25 +---------------------- src/sdf/context.h | 1 - src/sdf/optimizing_tape.h | 14 ++++++------- 4 files changed, 50 insertions(+), 32 deletions(-) create mode 100644 src/sdf/affine_value.h diff --git a/src/sdf/affine_value.h b/src/sdf/affine_value.h new file mode 100644 index 000000000..766279f90 --- /dev/null +++ b/src/sdf/affine_value.h @@ -0,0 +1,42 @@ +// Copyright 2024 The Manifold Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "context.h" + +namespace manifold::sdf { +struct AffineValue { + // value = var * a + b + Operand var; + double a; + double b; + + AffineValue(Operand var, double a, double b) : var(var), a(a), b(b) {} + AffineValue(double constant) : var(Operand::none()), a(0.0), b(constant) {} + bool operator==(const AffineValue &other) const { + return var == other.var && a == other.a && b == other.b; + } + AffineValue operator+(double d) { return AffineValue(var, a, b + d); } + AffineValue operator*(double d) { return AffineValue(var, a * d, b * d); } +}; +} // namespace manifold::sdf + +template <> +struct std::hash { + size_t operator()(const AffineValue &value) const { + size_t h = std::hash()(value.var.id); + hash_combine(h, value.a, value.b); + return h; + } +}; diff --git a/src/sdf/context.cpp b/src/sdf/context.cpp index a3b7d8e17..1aa95f6ae 100644 --- a/src/sdf/context.cpp +++ b/src/sdf/context.cpp @@ -20,32 +20,9 @@ #include #endif +#include "affine_value.h" #include "manifold/optional_assert.h" -struct AffineValue { - // value = var * a + b - Operand var; - double a; - double b; - - AffineValue(Operand var, double a, double b) : var(var), a(a), b(b) {} - AffineValue(double constant) : var(Operand::none()), a(0.0), b(constant) {} - bool operator==(const AffineValue &other) const { - return var == other.var && a == other.a && b == other.b; - } - AffineValue operator+(double d) { return AffineValue(var, a, b + d); } - AffineValue operator*(double d) { return AffineValue(var, a * d, b * d); } -}; - -template <> -struct std::hash { - size_t operator()(const AffineValue &value) const { - size_t h = std::hash()(value.var.id); - hash_combine(h, value.a, value.b); - return h; - } -}; - namespace manifold::sdf { void Context::dump() const { #ifdef MANIFOLD_DEBUG diff --git a/src/sdf/context.h b/src/sdf/context.h index 772366dc2..e0dc20f84 100644 --- a/src/sdf/context.h +++ b/src/sdf/context.h @@ -57,7 +57,6 @@ using namespace manifold::sdf; inline void hash_combine(std::size_t& seed) {} -// note: ankerl hash combine function is too costly template inline void hash_combine(std::size_t& seed, const T& v, Rest... rest) { std::hash hasher; diff --git a/src/sdf/optimizing_tape.h b/src/sdf/optimizing_tape.h index d7e0a4ca1..90d80cab7 100644 --- a/src/sdf/optimizing_tape.h +++ b/src/sdf/optimizing_tape.h @@ -15,6 +15,7 @@ #include +#include "affine_value.h" #include "interval.h" #include "manifold/vec_view.h" @@ -58,7 +59,7 @@ class OptimizerContext { * For dependencies, we use relative index to track them. * ID = current ID - value * - If value is 0, this means the operand is not being used, i.e. the - * instruction does not have 3 operands. + * instruction does not have that operand. * - If value is 255, this means the dependency is too far away, and we * should look it up in far dependencies. * Ideally, we should not have too many far dependencies. @@ -80,10 +81,9 @@ class OptimizerContext { * memory operations, we reuse them. * * - `buffer` is the regular register buffer for tape evaluation. - * - `constantOffset` is a constant that adds to a corresponding register. - * This can be constant folded. - * - `results` contains instruction id + register id, indicating the - * predetermined branch result for choice/min/max function. + * - `affineValue` is the affine value associated with a register. + * - `results` contains instruction id + affine value, indicating the + * optimized result for some instruction. * Note that this must be sorted according to instruction id. * - `uses` is the temporary use count vector that is mutable in each * evaluation. It is reset before each evaluation. @@ -91,8 +91,8 @@ class OptimizerContext { * elimination. */ VecView> buffer; - VecView constantOffset; - std::vector> results; + VecView affineValues; + std::vector> results; std::vector uses; std::vector dead; };