From 9c9ed402a95985efde4fde0d0bdcddaa4ee734ac Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Mon, 26 Feb 2018 14:42:36 -0500 Subject: [PATCH 1/4] Mod(%) on arm is particularly slow. Replace % with bitwise arithmetic. This helps reduce the performance regression seen between 1.03 and master. --- gapil/compiler/map.go | 8 ++++---- gapil/runtime/cc/map.inc | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/gapil/compiler/map.go b/gapil/compiler/map.go index 54fe625f9d..aa108debe5 100644 --- a/gapil/compiler/map.go +++ b/gapil/compiler/map.go @@ -219,7 +219,7 @@ func (c *C) buildMapType(t *semantic.Map) { capacity := m.Index(0, MapCapacity).Load() elements := m.Index(0, MapElements).Load() s.ForN(capacity, func(it *codegen.Value) *codegen.Value { - check := s.Rem(s.Add(h, it), capacity) + check := s.And(s.Add(h, it), s.Sub(capacity, s.Scalar(u64(1)))) valid := elements.Index(check, "used").Load() s.If(c.equal(s, valid, s.Scalar(mapElementEmpty)), func() { s.Return(s.Scalar(false)) @@ -251,7 +251,7 @@ func (c *C) buildMapType(t *semantic.Map) { h := c.hashValue(s, t.KeyType, k) // Search for existing s.ForN(capacity, func(it *codegen.Value) *codegen.Value { - check := s.Rem(s.Add(h, it), capacity) + check := s.And(s.Add(h, it), s.Sub(capacity, s.Scalar(u64(1)))) valid := elements.Index(check, "used").Load() s.If(c.equal(s, valid, s.Scalar(mapElementFull)), func() { found := c.equal(s, elements.Index(check, "k").Load(), k) @@ -273,7 +273,7 @@ func (c *C) buildMapType(t *semantic.Map) { getStorageBucket := func(h, table, tablesize *codegen.Value) *codegen.Value { newBucket := s.Local("newBucket", u64Type) s.ForN(tablesize, func(it *codegen.Value) *codegen.Value { - check := s.Rem(s.Add(h, it), tablesize).SetName("hash_bucket") + check := s.And(s.Add(h, it), s.Sub(tablesize, s.Scalar(u64(1)))).SetName("hash_bucket") newBucket.Store(check) valid := table.Index(check, "used").Load() notFound := c.equal(s, valid, s.Scalar(mapElementFull)) @@ -364,7 +364,7 @@ func (c *C) buildMapType(t *semantic.Map) { elements := m.Index(0, MapElements).Load() // Search for existing s.ForN(capacity, func(it *codegen.Value) *codegen.Value { - check := s.Rem(s.Add(h, it), capacity) + check := s.And(s.Add(h, it), s.Sub(capacity, s.Scalar(u64(1)))) valid := elements.Index(check, "used").Load() s.If(c.equal(s, valid, s.Scalar(mapElementFull)), func() { found := c.equal(s, elements.Index(check, "k").Load(), k) diff --git a/gapil/runtime/cc/map.inc b/gapil/runtime/cc/map.inc index 05cd67d021..ddee971ccc 100644 --- a/gapil/runtime/cc/map.inc +++ b/gapil/runtime/cc/map.inc @@ -22,6 +22,9 @@ #include +static_assert((GAPIL_MIN_MAP_SIZE & (GAPIL_MIN_MAP_SIZE - 1)) == 0, "Map size must be a power of 2"); +static_assert((GAPIL_MAP_GROW_MULTIPLIER & (GAPIL_MAP_GROW_MULTIPLIER - 1)) == 0, "Map size must be a power of 2"); + namespace gapil { template @@ -109,7 +112,7 @@ V* Map::Allocation::index(K key, bool insert) { for (uint64_t i = 0; i < capacity; ++i) { bool leave = false; - uint64_t lookup_pos = (hash + i) % capacity; + uint64_t lookup_pos = (hash + i) & (capacity - 1); switch(elems[lookup_pos].used) { case GAPIL_MAP_ELEMENT_EMPTY: leave = true; @@ -131,7 +134,7 @@ V* Map::Allocation::index(K key, bool insert) { auto storageBucket = [&](uint64_t h) { auto elems = els(); for (uint64_t i = 0; i < capacity; ++i) { - uint64_t x = (h + i) % capacity; + uint64_t x = (h + i) & (capacity - 1); if (elems[x].used != GAPIL_MAP_ELEMENT_FULL) { return x; } @@ -201,7 +204,7 @@ void Map::Allocation::remove(K key) { auto elems = els(); for (uint64_t i = 0; i < capacity; ++i) { - uint64_t lookup_pos = (hash + i) % capacity; + uint64_t lookup_pos = (hash + i) & (capacity - 1); switch(elems[lookup_pos].used) { case GAPIL_MAP_ELEMENT_EMPTY: return; From 65b9cd21822daee2655782c47b9bd24521d70e89 Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Tue, 27 Feb 2018 09:02:58 -0500 Subject: [PATCH 2/4] Disable arena tracking in libgapii. This is a significant performance improvement. --- core/memory/arena/BUILD.bazel | 2 +- core/memory/arena/cc/BUILD.bazel | 13 +++++++++++++ core/memory/arena/cc/arena.cpp | 20 ++++++++++++++++++++ gapii/cc/BUILD.bazel | 2 +- gapil/runtime/cc/BUILD.bazel | 20 ++++++++++++++++---- gapil/runtime/cc/runtime.cpp | 5 +++++ 6 files changed, 56 insertions(+), 6 deletions(-) diff --git a/core/memory/arena/BUILD.bazel b/core/memory/arena/BUILD.bazel index 456117d375..f12cb615ae 100644 --- a/core/memory/arena/BUILD.bazel +++ b/core/memory/arena/BUILD.bazel @@ -19,7 +19,7 @@ go_library( srcs = [ "arena.go", ], - cdeps = ["//core/memory/arena/cc"], + cdeps = ["//core/memory/arena/tracking_arena"], cgo = True, clinkopts = [], # keep importpath = "github.com/google/gapid/core/memory/arena", diff --git a/core/memory/arena/cc/BUILD.bazel b/core/memory/arena/cc/BUILD.bazel index 5ce999fc03..790378a06d 100644 --- a/core/memory/arena/cc/BUILD.bazel +++ b/core/memory/arena/cc/BUILD.bazel @@ -24,3 +24,16 @@ cc_library( copts = cc_copts(), visibility = ["//visibility:public"], ) + +cc_library( + name = "tracking_arena", + srcs = glob( + ["*.cpp"], + ), + deps = [ + "//core/cc", + ], + hdrs = glob(["*.h"]), + copts = cc_copts() + ["-DTRACK_ALLOCATIONS"], + visibility = ["//visibility:public"], +) \ No newline at end of file diff --git a/core/memory/arena/cc/arena.cpp b/core/memory/arena/cc/arena.cpp index 72ce29471a..38705f82ea 100644 --- a/core/memory/arena/cc/arena.cpp +++ b/core/memory/arena/cc/arena.cpp @@ -44,46 +44,66 @@ namespace core { Arena::Arena() {} Arena::~Arena() { +#ifdef TRACK_ALLOCATIONS for (void* ptr : allocations) { ::free(ptr); } allocations.clear(); +#endif } void* Arena::allocate(uint32_t size, uint32_t align) { void* ptr = malloc(size); // TODO: alignment +#ifdef TRACK_ALLOCATIONS allocations.insert(ptr); +#endif return ptr; } void* Arena::reallocate(void* ptr, uint32_t size, uint32_t align) { GAPID_ASSERT_MSG(this->owns(ptr), "ptr: %p", ptr); void* newptr = realloc(ptr, size); // TODO: alignment +#ifdef TRACK_ALLOCATIONS allocations.erase(ptr); allocations.insert(newptr); +#endif return newptr; } void Arena::free(void* ptr) { GAPID_ASSERT_MSG(this->owns(ptr), "ptr: %p", ptr); +#ifdef TRACK_ALLOCATIONS allocations.erase(ptr); +#endif ::free(ptr); } bool Arena::owns(void* ptr) { +#ifdef TRACK_ALLOCATIONS return allocations.count(ptr) == 1; +#else + return true; +#endif } size_t Arena::num_allocations() const { +#ifdef TRACK_ALLOCATIONS return allocations.size(); +#else + return 0; +#endif } size_t Arena::num_bytes_allocated() const { +#ifdef TRACK_ALLOCATIONS size_t bytes = 0; for (void* ptr : allocations) { bytes += alloc_size(ptr); } return bytes; +#else + return 0; +#endif } } // namespace core diff --git a/gapii/cc/BUILD.bazel b/gapii/cc/BUILD.bazel index 2e7e1405d0..d7d1a343d0 100644 --- a/gapii/cc/BUILD.bazel +++ b/gapii/cc/BUILD.bazel @@ -142,7 +142,7 @@ cc_library( "//core/memory/arena/cc", "//core/memory_tracker/cc", "//core/os/device/deviceinfo/cc", - "//gapil/runtime/cc", + "//gapil/runtime/cc:cc", "//gapis/api:api_cc_proto", "//gapis/api/gles/gles_pb:gles_pb_cc_proto", "//gapis/api/gvr/gvr_pb:gvr_pb_cc_proto", diff --git a/gapil/runtime/cc/BUILD.bazel b/gapil/runtime/cc/BUILD.bazel index 50db7853b9..1ed7f52a23 100644 --- a/gapil/runtime/cc/BUILD.bazel +++ b/gapil/runtime/cc/BUILD.bazel @@ -19,13 +19,25 @@ cc_library( hdrs = glob(["*.h"]), copts = cc_copts(), visibility = ["//visibility:public"], +) + +cc_library( + name = "cc", + srcs = [ + "runtime.cpp", + "string.cpp", + ], + hdrs = glob(["*.inc"]), + copts = cc_copts(), + visibility = ["//visibility:public"], deps = [ - "//core/memory/arena/cc", + ":headers", + "//core/memory/arena/cc:cc", ], ) cc_library( - name = "cc", + name = "test_cc", srcs = [ "runtime.cpp", "string.cpp", @@ -35,7 +47,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":headers", - "//core/memory/arena/cc", + "//core/memory/arena/cc:tracking_arena", ], ) @@ -53,7 +65,7 @@ cc_test( "//conditions:default": [], }), deps = [ - ":cc", + ":test_cc", "@gtest//:gtest_main", ] ) \ No newline at end of file diff --git a/gapil/runtime/cc/runtime.cpp b/gapil/runtime/cc/runtime.cpp index f850ca0c99..787c5cf8b3 100644 --- a/gapil/runtime/cc/runtime.cpp +++ b/gapil/runtime/cc/runtime.cpp @@ -22,6 +22,11 @@ #include #include +#if TARGET_OS == GAPID_OS_ANDROID +// for snprintf +#include +#endif + #define __STDC_FORMAT_MACROS #include From a1eaf22d7f143e5b9544602ee05509a14608b07f Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Tue, 27 Feb 2018 09:56:44 -0500 Subject: [PATCH 3/4] Clean up the build rules slightly. --- gapii/cc/BUILD.bazel | 2 +- gapil/runtime/cc/BUILD.bazel | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gapii/cc/BUILD.bazel b/gapii/cc/BUILD.bazel index d7d1a343d0..2e7e1405d0 100644 --- a/gapii/cc/BUILD.bazel +++ b/gapii/cc/BUILD.bazel @@ -142,7 +142,7 @@ cc_library( "//core/memory/arena/cc", "//core/memory_tracker/cc", "//core/os/device/deviceinfo/cc", - "//gapil/runtime/cc:cc", + "//gapil/runtime/cc", "//gapis/api:api_cc_proto", "//gapis/api/gles/gles_pb:gles_pb_cc_proto", "//gapis/api/gvr/gvr_pb:gvr_pb_cc_proto", diff --git a/gapil/runtime/cc/BUILD.bazel b/gapil/runtime/cc/BUILD.bazel index 1ed7f52a23..bd36295dd8 100644 --- a/gapil/runtime/cc/BUILD.bazel +++ b/gapil/runtime/cc/BUILD.bazel @@ -32,7 +32,7 @@ cc_library( visibility = ["//visibility:public"], deps = [ ":headers", - "//core/memory/arena/cc:cc", + "//core/memory/arena/cc", ], ) From 90db4adf0e7a94f2ccfa61fa4ab4cfac825b871e Mon Sep 17 00:00:00 2001 From: Andrew Woloszyn Date: Tue, 27 Feb 2018 10:05:57 -0500 Subject: [PATCH 4/4] Add an assert to the compiler for map sizes. We require that they are a power of 2, otherwise we have to fall back to a slow % operator, instead of &. --- gapil/compiler/map.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gapil/compiler/map.go b/gapil/compiler/map.go index aa108debe5..ab715c1d5b 100644 --- a/gapil/compiler/map.go +++ b/gapil/compiler/map.go @@ -66,6 +66,12 @@ import ( // If we end up with lots of insertions/deletions, this will prevent linear search func (c *C) defineMapType(t *semantic.Map) { + + if ((minMapSize & (minMapSize - 1)) != 0) || + ((mapGrowMultiplier & (mapGrowMultiplier - 1)) != 0) { + fail("Map size must be a power of 2") + } + mapPtrTy := c.T.target[t].(codegen.Pointer) mapStrTy := mapPtrTy.Element.(*codegen.Struct) keyTy := c.T.Target(t.KeyType)