Skip to content

Commit bb80f94

Browse files
committed
Implement scraper global statistics cache optimization
This commit changes the ConvergedManifestPartsMap to a map of pointers, ConvergedManifestPartPtrsMap. All of the pointer ownership problems may not be solved yet by this commit, but most of them are. The CScraperManifest held by the ConvergedManifest is via a shared_ptr. In turn the CParts in the CScraperManifest::CSplitBlob vParts are also mapped into the ConvergedManifestPartPtrsMap, which is indexed by project. The part pointers in the ConvergedManifestPartPtrsMap are valid for the same lifetime as the CScraperManifest held by shared pointer in the ConvergedManifest, so I think this is safe. Thjs minimizes the changes to the rest of the scraper to achieve this optimization. I am less certain of the pointer safety in the changes I made in the quorum and superblock classes.
1 parent c0a6573 commit bb80f94

File tree

7 files changed

+438
-120
lines changed

7 files changed

+438
-120
lines changed

src/neuralnet/quorum.cpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -788,11 +788,11 @@ class SuperblockValidator
788788
//! \param project_name Identifies the project to add.
789789
//! \param project_part_data Serialized project stats of the part.
790790
//!
791-
void AddPart(std::string project_name, CSerializeData project_part_data)
791+
void AddPart(std::string project_name, CSplitBlob::CPart* project_part_ptr)
792792
{
793-
m_convergence.ConvergedManifestPartsMap.emplace(
793+
m_convergence.ConvergedManifestPartPtrsMap.emplace(
794794
std::move(project_name),
795-
std::move(project_part_data));
795+
std::move(project_part_ptr));
796796
}
797797

798798
//!
@@ -944,7 +944,7 @@ class SuperblockValidator
944944

945945
convergence.AddPart(
946946
project_pair.first, // project name
947-
GetResolvedPartData(resolved_part.m_part_hash));
947+
GetResolvedPartPtr(resolved_part.m_part_hash));
948948

949949
remainder -= part_index * project.m_combiner_mask;
950950

@@ -981,7 +981,7 @@ class SuperblockValidator
981981
//!
982982
//! \return Serialized binary data of the part to add to a convergence.
983983
//!
984-
static CSerializeData GetResolvedPartData(const uint256& part_hash)
984+
static CSplitBlob::CPart* GetResolvedPartPtr(const uint256& part_hash)
985985
{
986986
LOCK(CSplitBlob::cs_mapParts);
987987

@@ -991,10 +991,10 @@ class SuperblockValidator
991991
// the most recent project part should always exist:
992992
if (iter == CSplitBlob::mapParts.end()) {
993993
LogPrintf("ValidateSuperblock(): project part disappeared.");
994-
return CSerializeData();
994+
return nullptr;
995995
}
996996

997-
return iter->second.data;
997+
return &(iter->second);
998998
}
999999

10001000
//!
@@ -1029,7 +1029,7 @@ class SuperblockValidator
10291029
return;
10301030
}
10311031

1032-
convergence.AddPart("BeaconList", manifest.vParts[0]->data);
1032+
convergence.AddPart("BeaconList", manifest.vParts[0]);
10331033

10341034
// Find the offset of the verified beacons project part. Typically
10351035
// this exists at vParts offset 1 when a scraper verified at least
@@ -1054,7 +1054,7 @@ class SuperblockValidator
10541054
return;
10551055
}
10561056

1057-
convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]->data);
1057+
convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]);
10581058
}
10591059
}; // ProjectCombiner
10601060

src/neuralnet/quorum.h

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <string>
4+
#include "scraper_net.h"
45

56
class CBlockIndex;
67

src/neuralnet/superblock.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,9 @@ Superblock Superblock::FromConvergence(
557557
// Add hints created from the hashes of converged manifest parts to each
558558
// superblock project section to assist receiving nodes with validation:
559559
//
560-
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartsMap) {
560+
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartPtrsMap) {
561561
const std::string& project_name = part_pair.first;
562-
const CSerializeData& part_data = part_pair.second;
562+
const CSerializeData& part_data = part_pair.second->data;
563563

564564
projects.SetHint(project_name, part_data);
565565
}

src/neuralnet/superblock.h

+27
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,22 @@ struct hash<NN::QuorumHash>
15331533
// This is part of the scraper but is put here, because it needs the complete NN:Superblock class.
15341534
struct ConvergedScraperStats
15351535
{
1536+
ConvergedScraperStats() : Convergence(), NewFormatSuperblock()
1537+
{
1538+
bClean = false;
1539+
1540+
nTime = 0;
1541+
mScraperConvergedStats = {};
1542+
PastConvergences = {};
1543+
}
1544+
1545+
ConvergedScraperStats(const int64_t nTime_in, const ConvergedManifest& Convergence) : Convergence(Convergence)
1546+
{
1547+
bClean = false;
1548+
1549+
nTime = nTime_in;
1550+
}
1551+
15361552
// Flag to indicate cache is clean or dirty (i.e. state change of underlying statistics has occurred.
15371553
// This flag is marked true in ScraperGetSuperblockContract() and false on receipt or deletion of
15381554
// statistics objects.
@@ -1558,7 +1574,18 @@ struct ConvergedScraperStats
15581574
{
15591575
// This is specifically this form of insert to insure that if there is a hint "collision" the referenced
15601576
// SB Hash and Convergence stored will be the LATER one.
1577+
15611578
PastConvergences[nReducedContentHash] = std::make_pair(NewFormatSuperblock.GetHash(), Convergence);
1579+
1580+
/*
1581+
if (PastConvergences.find(nReducedContentHash) != PastConvergences.end())
1582+
{
1583+
PastConvergences.erase(nReducedContentHash);
1584+
}
1585+
1586+
PastConvergences.emplace(std::make_pair(nReducedContentHash, Convergence));
1587+
*/
1588+
15621589
}
15631590
}
15641591

src/scraper/fwd.h

+218-4
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "util.h"
1010
#include "streams.h"
1111

12+
#include "scraper_net.h"
13+
1214
/*********************
1315
* Scraper ENUMS *
1416
*********************/
@@ -56,12 +58,164 @@ typedef std::multimap<int64_t, std::pair<uint256, uint256>, std::greater <int64_
5658
typedef std::map<ScraperID, mCSManifest> mmCSManifestsBinnedByScraper;
5759

5860
// -------------- Project ---- Converged Part
59-
typedef std::map<std::string, CSerializeData> mConvergedManifestParts;
60-
// Note that this IS a copy not a pointer. Since manifests and parts can be deleted because of aging rules,
61-
// it is dangerous to save memory and point to the actual part objects themselves.
61+
// typedef std::map<std::string, CSerializeData> mConvergedManifestParts;
62+
63+
// -------------- Project -- Converged Part Pointer
64+
typedef std::map<std::string, CSplitBlob::CPart*> mConvergedManifestPart_ptrs;
6265

6366
struct ConvergedManifest
6467
{
68+
ConvergedManifest()
69+
{
70+
nContentHash = {};
71+
ConsensusBlock = {};
72+
timestamp = 0;
73+
bByParts = false;
74+
75+
CScraperConvergedManifest_ptr = nullptr;
76+
77+
ConvergedManifestPartPtrsMap = {};
78+
79+
mIncludedScraperManifests = {};
80+
81+
nUnderlyingManifestContentHash = {};
82+
83+
vIncludedScrapers = {};
84+
vExcludedScrapers = {};
85+
vScrapersNotPublishing = {};
86+
87+
mIncludedScrapersbyProject = {};
88+
mIncludedProjectsbyScraper = {};
89+
90+
mScraperConvergenceCountbyProject = {};
91+
92+
vExcludedProjects = {};
93+
}
94+
95+
ConvergedManifest(const ConvergedManifest& in)
96+
{
97+
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
98+
nContentHash = in.nContentHash;
99+
100+
ConsensusBlock = in.ConsensusBlock;
101+
timestamp = in.timestamp;
102+
bByParts = in.bByParts;
103+
104+
CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;
105+
106+
PopulateConvergedManifestPartPtrsMap();
107+
108+
mIncludedScraperManifests = in.mIncludedScraperManifests;
109+
110+
nUnderlyingManifestContentHash = in.nUnderlyingManifestContentHash;
111+
112+
vIncludedScrapers = in.vIncludedScrapers;
113+
vExcludedScrapers = in.vExcludedScrapers;
114+
vScrapersNotPublishing = in.vScrapersNotPublishing;
115+
116+
mIncludedScrapersbyProject = in.mIncludedScrapersbyProject;
117+
mIncludedProjectsbyScraper = in.mIncludedProjectsbyScraper;
118+
119+
mScraperConvergenceCountbyProject = in.mScraperConvergenceCountbyProject;
120+
121+
vExcludedProjects = in.vExcludedProjects;
122+
}
123+
124+
// For constructing a dummy converged manifest from a single manifest
125+
ConvergedManifest(CScraperManifest& in)
126+
{
127+
ConsensusBlock = in.ConsensusBlock;
128+
timestamp = GetAdjustedTime();
129+
bByParts = false;
130+
131+
CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);
132+
133+
PopulateConvergedManifestPartPtrsMap();
134+
135+
ComputeConvergedContentHash();
136+
137+
nUnderlyingManifestContentHash = in.nContentHash;
138+
}
139+
140+
141+
void operator()(ConvergedManifest& in)
142+
{
143+
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
144+
nContentHash = in.nContentHash;
145+
146+
ConsensusBlock = in.ConsensusBlock;
147+
timestamp = in.timestamp;
148+
bByParts = in.bByParts;
149+
150+
CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;
151+
152+
PopulateConvergedManifestPartPtrsMap();
153+
154+
//ConvergedManifestPartsMap = in.ConvergedManifestPartsMap;
155+
156+
mIncludedScraperManifests = in.mIncludedScraperManifests;
157+
158+
nUnderlyingManifestContentHash = in.nUnderlyingManifestContentHash;
159+
160+
vIncludedScrapers = in.vIncludedScrapers;
161+
vExcludedScrapers = in.vExcludedScrapers;
162+
vScrapersNotPublishing = in.vScrapersNotPublishing;
163+
164+
mIncludedScrapersbyProject = in.mIncludedScrapersbyProject;
165+
mIncludedProjectsbyScraper = in.mIncludedProjectsbyScraper;
166+
167+
mScraperConvergenceCountbyProject = in.mScraperConvergenceCountbyProject;
168+
169+
vExcludedProjects = in.vExcludedProjects;
170+
}
171+
172+
bool operator()(const CScraperManifest& in)
173+
{
174+
ConsensusBlock = in.ConsensusBlock;
175+
timestamp = GetAdjustedTime();
176+
bByParts = false;
177+
178+
CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);
179+
180+
bool bConvergedContentHashMatches = PopulateConvergedManifestPartPtrsMap();
181+
182+
ComputeConvergedContentHash();
183+
184+
nUnderlyingManifestContentHash = in.nContentHash;
185+
186+
return bConvergedContentHashMatches;
187+
}
188+
189+
void Reset()
190+
{
191+
nContentHash = {};
192+
ConsensusBlock = {};
193+
timestamp = 0;
194+
bByParts = false;
195+
196+
CScraperConvergedManifest_ptr = nullptr;
197+
198+
//ConvergedManifestPartsMap = {};
199+
200+
ConvergedManifestPartPtrsMap = {};
201+
202+
mIncludedScraperManifests = {};
203+
204+
nUnderlyingManifestContentHash = {};
205+
206+
vIncludedScrapers = {};
207+
vExcludedScrapers = {};
208+
vScrapersNotPublishing = {};
209+
210+
mIncludedScrapersbyProject = {};
211+
mIncludedProjectsbyScraper = {};
212+
213+
mScraperConvergenceCountbyProject = {};
214+
215+
vExcludedProjects = {};
216+
}
217+
218+
65219
// IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest.
66220
// It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents
67221
// the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level.
@@ -70,7 +224,11 @@ struct ConvergedManifest
70224
int64_t timestamp;
71225
bool bByParts;
72226

73-
mConvergedManifestParts ConvergedManifestPartsMap;
227+
std::shared_ptr<CScraperManifest> CScraperConvergedManifest_ptr;
228+
229+
// mConvergedManifestParts ConvergedManifestPartsMap;
230+
231+
mConvergedManifestPart_ptrs ConvergedManifestPartPtrsMap;
74232

75233
// Used when convergence is at the manifest level (normal)
76234
std::map<ScraperID, uint256> mIncludedScraperManifests;
@@ -97,6 +255,62 @@ struct ConvergedManifest
97255

98256
// --------- project
99257
std::vector<std::string> vExcludedProjects;
258+
259+
bool PopulateConvergedManifestPartPtrsMap()
260+
{
261+
if (CScraperConvergedManifest_ptr == nullptr) return false;
262+
263+
int iPartNum = 0;
264+
CDataStream ss(SER_NETWORK,1);
265+
WriteCompactSize(ss, CScraperConvergedManifest_ptr->vParts.size());
266+
uint256 nContentHashCheck;
267+
268+
for (const auto& iter : CScraperConvergedManifest_ptr->vParts)
269+
{
270+
std::string sProject;
271+
272+
if (iPartNum == 0)
273+
sProject = "BeaconList";
274+
else
275+
sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project;
276+
277+
// Copy the pointer to the CPart into the map. This is ok, because the parts will be held
278+
// until the CScraperManifest in this object is destroyed and all of the manifest refs to the part
279+
// are gone.
280+
ConvergedManifestPartPtrsMap.insert(std::make_pair(sProject, iter));
281+
282+
// Serialize the hash to doublecheck the content hash.
283+
ss << iter->hash;
284+
285+
iPartNum++;
286+
}
287+
288+
ss << CScraperConvergedManifest_ptr->ConsensusBlock;
289+
290+
nContentHashCheck = Hash(ss.begin(), ss.end());
291+
292+
if (nContentHashCheck != CScraperConvergedManifest_ptr->nContentHash)
293+
{
294+
LogPrintf("ERROR: PopulateConvergedManifestPartPtrsMap(): Selected Manifest content hash check failed! "
295+
"nContentHashCheck = %s and nContentHash = %s.",
296+
nContentHashCheck.GetHex(), CScraperConvergedManifest_ptr->nContentHash.GetHex());
297+
return false;
298+
}
299+
300+
return true;
301+
}
302+
303+
void ComputeConvergedContentHash()
304+
{
305+
CDataStream ss(SER_NETWORK,1);
306+
307+
for (const auto& iter : ConvergedManifestPartPtrsMap)
308+
{
309+
ss << iter.second->data;
310+
}
311+
312+
nContentHash = Hash(ss.begin(), ss.end());
313+
}
100314
};
101315

102316

0 commit comments

Comments
 (0)