Skip to content

Commit 9fc896d

Browse files
committed
Implement scraper global statistics cache optimization
This commit changes the ConvergedManifestPartsMap to a map of pointers, ConvergedManifestPartPtrsMap. The CScraperManifest held bythe ConvergedManifest is via a shared_ptr. In turn the CParts in the CScraperManifest::CSplitBlob vParts are also mapped intothe ConvergedManifestPartPtrsMap, which is indexed by project. The part pointers in the ConvergedManifestPartPtrsMap are valid for the same lifetime as the CScraperManifest held by shared pointer in the ConvergedManifest, so this is safe. Thjs minimizes the changes to the rest of the scraper to achieve this optimization. Some adjustments to the testing harness was necessary. I also found and put in a missing IsComplete() check in BinCScraperManifestsByScraper(), which may have been responsible for the extremely rare occurence of missing projects in a local node convergence and explainmagnitude.
1 parent bec808f commit 9fc896d

File tree

9 files changed

+498
-183
lines changed

9 files changed

+498
-183
lines changed

src/neuralnet/quorum.cpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -788,11 +788,11 @@ class SuperblockValidator
788788
//! \param project_name Identifies the project to add.
789789
//! \param project_part_data Serialized project stats of the part.
790790
//!
791-
void AddPart(std::string project_name, CSerializeData project_part_data)
791+
void AddPart(std::string project_name, CSplitBlob::CPart* project_part_ptr)
792792
{
793-
m_convergence.ConvergedManifestPartsMap.emplace(
793+
m_convergence.ConvergedManifestPartPtrsMap.emplace(
794794
std::move(project_name),
795-
std::move(project_part_data));
795+
std::move(project_part_ptr));
796796
}
797797

798798
//!
@@ -944,7 +944,7 @@ class SuperblockValidator
944944

945945
convergence.AddPart(
946946
project_pair.first, // project name
947-
GetResolvedPartData(resolved_part.m_part_hash));
947+
GetResolvedPartPtr(resolved_part.m_part_hash));
948948

949949
remainder -= part_index * project.m_combiner_mask;
950950

@@ -981,7 +981,7 @@ class SuperblockValidator
981981
//!
982982
//! \return Serialized binary data of the part to add to a convergence.
983983
//!
984-
static CSerializeData GetResolvedPartData(const uint256& part_hash)
984+
static CSplitBlob::CPart* GetResolvedPartPtr(const uint256& part_hash)
985985
{
986986
LOCK(CSplitBlob::cs_mapParts);
987987

@@ -991,10 +991,10 @@ class SuperblockValidator
991991
// the most recent project part should always exist:
992992
if (iter == CSplitBlob::mapParts.end()) {
993993
LogPrintf("ValidateSuperblock(): project part disappeared.");
994-
return CSerializeData();
994+
return nullptr;
995995
}
996996

997-
return iter->second.data;
997+
return &(iter->second);
998998
}
999999

10001000
//!
@@ -1029,7 +1029,7 @@ class SuperblockValidator
10291029
return;
10301030
}
10311031

1032-
convergence.AddPart("BeaconList", manifest.vParts[0]->data);
1032+
convergence.AddPart("BeaconList", manifest.vParts[0]);
10331033

10341034
// Find the offset of the verified beacons project part. Typically
10351035
// this exists at vParts offset 1 when a scraper verified at least
@@ -1054,7 +1054,7 @@ class SuperblockValidator
10541054
return;
10551055
}
10561056

1057-
convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]->data);
1057+
convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]);
10581058
}
10591059
}; // ProjectCombiner
10601060

src/neuralnet/quorum.h

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include <string>
4+
#include "scraper_net.h"
45

56
class CBlockIndex;
67

src/neuralnet/superblock.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,11 @@ Superblock Superblock::FromConvergence(
557557
// Add hints created from the hashes of converged manifest parts to each
558558
// superblock project section to assist receiving nodes with validation:
559559
//
560-
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartsMap) {
560+
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartPtrsMap) {
561561
const std::string& project_name = part_pair.first;
562-
const CSerializeData& part_data = part_pair.second;
562+
const CSplitBlob::CPart* part_data_ptr = part_pair.second;
563563

564-
projects.SetHint(project_name, part_data);
564+
projects.SetHint(project_name, part_data_ptr);
565565
}
566566

567567
return superblock;
@@ -954,7 +954,7 @@ void Superblock::ProjectIndex::Add(std::string name, const ProjectStats& stats)
954954

955955
void Superblock::ProjectIndex::SetHint(
956956
const std::string& name,
957-
const CSerializeData& part_data)
957+
const CSplitBlob::CPart* part_data_ptr)
958958
{
959959
auto iter = std::lower_bound(
960960
m_projects.begin(),
@@ -966,7 +966,7 @@ void Superblock::ProjectIndex::SetHint(
966966
return;
967967
}
968968

969-
const uint256 part_hash = Hash(part_data.begin(), part_data.end());
969+
const uint256 part_hash = Hash(part_data_ptr->data.begin(), part_data_ptr->data.end());
970970
iter->second.m_convergence_hint = part_hash.GetUint64() >> 32;
971971

972972
m_converged_by_project = true;

src/neuralnet/superblock.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -1087,7 +1087,7 @@ class Superblock
10871087
//!
10881088
//! \param part_data The convergence part to create the hint from.
10891089
//!
1090-
void SetHint(const std::string& name, const CSerializeData& part_data);
1090+
void SetHint(const std::string& name, const CSplitBlob::CPart *part_data_ptr);
10911091

10921092
//!
10931093
//! \brief Serialize the object to the provided stream.
@@ -1533,6 +1533,22 @@ struct hash<NN::QuorumHash>
15331533
// This is part of the scraper but is put here, because it needs the complete NN:Superblock class.
15341534
struct ConvergedScraperStats
15351535
{
1536+
ConvergedScraperStats() : Convergence(), NewFormatSuperblock()
1537+
{
1538+
bClean = false;
1539+
1540+
nTime = 0;
1541+
mScraperConvergedStats = {};
1542+
PastConvergences = {};
1543+
}
1544+
1545+
ConvergedScraperStats(const int64_t nTime_in, const ConvergedManifest& Convergence) : Convergence(Convergence)
1546+
{
1547+
bClean = false;
1548+
1549+
nTime = nTime_in;
1550+
}
1551+
15361552
// Flag to indicate cache is clean or dirty (i.e. state change of underlying statistics has occurred.
15371553
// This flag is marked true in ScraperGetSuperblockContract() and false on receipt or deletion of
15381554
// statistics objects.
@@ -1558,6 +1574,7 @@ struct ConvergedScraperStats
15581574
{
15591575
// This is specifically this form of insert to insure that if there is a hint "collision" the referenced
15601576
// SB Hash and Convergence stored will be the LATER one.
1577+
15611578
PastConvergences[nReducedContentHash] = std::make_pair(NewFormatSuperblock.GetHash(), Convergence);
15621579
}
15631580
}

src/rpcserver.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ static const CRPCCommand vRPCCommands[] =
399399
{ "archivelog", &archivelog, cat_developer },
400400
{ "testnewsb", &testnewsb, cat_developer },
401401
{ "convergencereport", &convergencereport, cat_developer },
402+
{ "scraperreport", &scraperreport, cat_developer },
402403

403404
// Network commands
404405
{ "addnode", &addnode, cat_network },

src/rpcserver.h

+1
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ extern UniValue deletecscrapermanifest(const UniValue& params, bool fHelp);
213213
extern UniValue archivelog(const UniValue& params, bool fHelp);
214214
extern UniValue testnewsb(const UniValue& params, bool fHelp);
215215
extern UniValue convergencereport(const UniValue& params, bool fHelp);
216+
extern UniValue scraperreport(const UniValue& params, bool fHelp);
216217

217218
// Network
218219
extern UniValue addnode(const UniValue& params, bool fHelp);

src/scraper/fwd.h

+132-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "util.h"
1010
#include "streams.h"
1111

12+
#include "scraper_net.h"
13+
1214
/*********************
1315
* Scraper ENUMS *
1416
*********************/
@@ -55,13 +57,80 @@ typedef std::multimap<int64_t, std::pair<uint256, uint256>, std::greater <int64_
5557
// See the ScraperID typedef above.
5658
typedef std::map<ScraperID, mCSManifest> mmCSManifestsBinnedByScraper;
5759

58-
// -------------- Project ---- Converged Part
59-
typedef std::map<std::string, CSerializeData> mConvergedManifestParts;
60-
// Note that this IS a copy not a pointer. Since manifests and parts can be deleted because of aging rules,
61-
// it is dangerous to save memory and point to the actual part objects themselves.
60+
// Note the CParts pointed to by this map are safe to access, because the pointers are guaranteed valid
61+
// as long as the holding CScraperManifests (both in the CScaperManifest global map, and this cache)
62+
// still exist. So the safety of these pointers is coincident with the lifespan of CScraperManifests
63+
// that have reference to them. If you have questions about this, you should review the CSplitBlob abstract
64+
// class, which is the base class of the CScraperManifest class, and provides the mechanisms for part
65+
// control. Note that two LOCKS are used to protect the integrity of the underlying global maps,
66+
// CScraperManifest::cs_mapManifest and CSplitBlob::cs_mapParts.
67+
// -------------- Project -- Converged Part Pointer
68+
typedef std::map<std::string, CSplitBlob::CPart*> mConvergedManifestPart_ptrs;
6269

6370
struct ConvergedManifest
6471
{
72+
// Empty converged manifest constructor
73+
ConvergedManifest()
74+
{
75+
nContentHash = {};
76+
ConsensusBlock = {};
77+
timestamp = 0;
78+
bByParts = false;
79+
80+
CScraperConvergedManifest_ptr = nullptr;
81+
82+
ConvergedManifestPartPtrsMap = {};
83+
84+
mIncludedScraperManifests = {};
85+
86+
nUnderlyingManifestContentHash = {};
87+
88+
vIncludedScrapers = {};
89+
vExcludedScrapers = {};
90+
vScrapersNotPublishing = {};
91+
92+
mIncludedScrapersbyProject = {};
93+
mIncludedProjectsbyScraper = {};
94+
95+
mScraperConvergenceCountbyProject = {};
96+
97+
vExcludedProjects = {};
98+
}
99+
100+
// For constructing a dummy converged manifest from a single manifest
101+
ConvergedManifest(CScraperManifest& in)
102+
{
103+
ConsensusBlock = in.ConsensusBlock;
104+
timestamp = GetAdjustedTime();
105+
bByParts = false;
106+
107+
CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);
108+
109+
PopulateConvergedManifestPartPtrsMap();
110+
111+
ComputeConvergedContentHash();
112+
113+
nUnderlyingManifestContentHash = in.nContentHash;
114+
}
115+
116+
// Call operator to update an already initialized ConvergedManifest with a passed in CScraperManifest
117+
bool operator()(const CScraperManifest& in)
118+
{
119+
ConsensusBlock = in.ConsensusBlock;
120+
timestamp = GetAdjustedTime();
121+
bByParts = false;
122+
123+
CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);
124+
125+
bool bConvergedContentHashMatches = PopulateConvergedManifestPartPtrsMap();
126+
127+
ComputeConvergedContentHash();
128+
129+
nUnderlyingManifestContentHash = in.nContentHash;
130+
131+
return bConvergedContentHashMatches;
132+
}
133+
65134
// IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest.
66135
// It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents
67136
// the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level.
@@ -70,7 +139,9 @@ struct ConvergedManifest
70139
int64_t timestamp;
71140
bool bByParts;
72141

73-
mConvergedManifestParts ConvergedManifestPartsMap;
142+
std::shared_ptr<CScraperManifest> CScraperConvergedManifest_ptr;
143+
144+
mConvergedManifestPart_ptrs ConvergedManifestPartPtrsMap;
74145

75146
// Used when convergence is at the manifest level (normal)
76147
std::map<ScraperID, uint256> mIncludedScraperManifests;
@@ -97,6 +168,62 @@ struct ConvergedManifest
97168

98169
// --------- project
99170
std::vector<std::string> vExcludedProjects;
171+
172+
bool PopulateConvergedManifestPartPtrsMap()
173+
{
174+
if (CScraperConvergedManifest_ptr == nullptr) return false;
175+
176+
int iPartNum = 0;
177+
CDataStream ss(SER_NETWORK,1);
178+
WriteCompactSize(ss, CScraperConvergedManifest_ptr->vParts.size());
179+
uint256 nContentHashCheck;
180+
181+
for (const auto& iter : CScraperConvergedManifest_ptr->vParts)
182+
{
183+
std::string sProject;
184+
185+
if (iPartNum == 0)
186+
sProject = "BeaconList";
187+
else
188+
sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project;
189+
190+
// Copy the pointer to the CPart into the map. This is ok, because the parts will be held
191+
// until the CScraperManifest in this object is destroyed and all of the manifest refs to the part
192+
// are gone.
193+
ConvergedManifestPartPtrsMap.insert(std::make_pair(sProject, iter));
194+
195+
// Serialize the hash to doublecheck the content hash.
196+
ss << iter->hash;
197+
198+
iPartNum++;
199+
}
200+
201+
ss << CScraperConvergedManifest_ptr->ConsensusBlock;
202+
203+
nContentHashCheck = Hash(ss.begin(), ss.end());
204+
205+
if (nContentHashCheck != CScraperConvergedManifest_ptr->nContentHash)
206+
{
207+
LogPrintf("ERROR: PopulateConvergedManifestPartPtrsMap(): Selected Manifest content hash check failed! "
208+
"nContentHashCheck = %s and nContentHash = %s.",
209+
nContentHashCheck.GetHex(), CScraperConvergedManifest_ptr->nContentHash.GetHex());
210+
return false;
211+
}
212+
213+
return true;
214+
}
215+
216+
void ComputeConvergedContentHash()
217+
{
218+
CDataStream ss(SER_NETWORK,1);
219+
220+
for (const auto& iter : ConvergedManifestPartPtrsMap)
221+
{
222+
ss << iter.second->data;
223+
}
224+
225+
nContentHash = Hash(ss.begin(), ss.end());
226+
}
100227
};
101228

102229

0 commit comments

Comments
 (0)