Skip to content

Commit cd6a181

Browse files
committed
Add locking
This commit adds locks in a few places that I am concerned could be a problem if the underlying global map changes in another thread, plus makes a few other cleenups.
1 parent 954bd25 commit cd6a181

File tree

2 files changed

+69
-129
lines changed

2 files changed

+69
-129
lines changed

src/scraper/fwd.h

+13-5
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,14 @@ typedef std::map<ScraperID, mCSManifest> mmCSManifestsBinnedByScraper;
6767
// -------------- Project -- Converged Part Pointer
6868
typedef std::map<std::string, CSplitBlob::CPart*> mConvergedManifestPart_ptrs;
6969

70+
// The below type is still used for situations where a long global lock would be held during stats processing
71+
// primarily in GetScraperStatsByConvergedManifest because of the intense work done in LoadProjectObjectToStatsByCPID.
72+
// -------------- Project ---- Converged Part
73+
typedef std::map<std::string, CSerializeData> mConvergedManifestParts;
74+
7075
struct ConvergedManifest
7176
{
77+
// Empty converged manifest constructor
7278
ConvergedManifest()
7379
{
7480
nContentHash = {};
@@ -96,9 +102,9 @@ struct ConvergedManifest
96102
vExcludedProjects = {};
97103
}
98104

105+
// Constructor of a passed in ConvergedManifest (effectively a copy)
99106
ConvergedManifest(const ConvergedManifest& in)
100107
{
101-
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
102108
nContentHash = in.nContentHash;
103109

104110
ConsensusBlock = in.ConsensusBlock;
@@ -107,7 +113,7 @@ struct ConvergedManifest
107113

108114
CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;
109115

110-
PopulateConvergedManifestPartPtrsMap();
116+
ConvergedManifestPartPtrsMap = in.ConvergedManifestPartPtrsMap;
111117

112118
mIncludedScraperManifests = in.mIncludedScraperManifests;
113119

@@ -142,6 +148,7 @@ struct ConvergedManifest
142148
}
143149

144150

151+
// Call operator to update an already initialized ConvergedManifest with a passed in ConvergedManifest
145152
void operator()(ConvergedManifest& in)
146153
{
147154
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
@@ -153,9 +160,7 @@ struct ConvergedManifest
153160

154161
CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;
155162

156-
PopulateConvergedManifestPartPtrsMap();
157-
158-
//ConvergedManifestPartsMap = in.ConvergedManifestPartsMap;
163+
ConvergedManifestPartPtrsMap = in.ConvergedManifestPartPtrsMap;
159164

160165
mIncludedScraperManifests = in.mIncludedScraperManifests;
161166

@@ -173,6 +178,7 @@ struct ConvergedManifest
173178
vExcludedProjects = in.vExcludedProjects;
174179
}
175180

181+
// Call operator to update an already initialized ConvergedManifest with a passed in CScraperManifest
176182
bool operator()(const CScraperManifest& in)
177183
{
178184
ConsensusBlock = in.ConsensusBlock;
@@ -276,6 +282,8 @@ struct ConvergedManifest
276282
else
277283
sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project;
278284

285+
//if (sProject.empty()) return false;
286+
279287
// Copy the pointer to the CPart into the map. This is ok, because the parts will be held
280288
// until the CScraperManifest in this object is destroyed and all of the manifest refs to the part
281289
// are gone.

src/scraper/scraper.cpp

+56-124
Original file line numberDiff line numberDiff line change
@@ -3457,26 +3457,42 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByConvergedManifest(const Converge
34573457

34583458
double dMagnitudePerProject = NEURALNETWORKMULTIPLIER / nActiveProjects;
34593459

3460+
mConvergedManifestParts ConvergedManifestPartsMap;
34603461
ScraperStats mScraperStats;
34613462

3462-
for (auto entry = StructConvergedManifest.ConvergedManifestPartPtrsMap.begin(); entry != StructConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry)
3463+
// This is a lock and copy block specifically because LoadProjectObjectToStatsByCPID is very expensive and in
3464+
// this particular case it will reduce lock time to make a copy of the parts temporarily for processing. We have
3465+
// to take these locks now because of the conversion of the StructConvergedManifest parts map to pointers.
34633466
{
3464-
std::string project = entry->first;
3465-
ScraperStats mProjectScraperStats;
3467+
LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts);
3468+
_log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts");
34663469

3467-
// Do not process the BeaconList or VerifiedBeacons as a project stats file.
3468-
if (project != "BeaconList" && project != "VerifiedBeacons")
3470+
for (auto entry = StructConvergedManifest.ConvergedManifestPartPtrsMap.begin(); entry != StructConvergedManifest.ConvergedManifestPartPtrsMap.end(); ++entry)
34693471
{
3470-
_log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Processing stats for project: " + project);
3471-
3472-
LoadProjectObjectToStatsByCPID(project, entry->second->data, dMagnitudePerProject, mProjectScraperStats);
3473-
3474-
// Insert into overall map.
3475-
for (auto const& entry2 : mProjectScraperStats)
3472+
// Do not process the BeaconList or VerifiedBeacons as a project stats file.
3473+
if (entry->first != "BeaconList" && entry->first != "VerifiedBeacons")
34763474
{
3477-
mScraperStats[entry2.first] = entry2.second;
3475+
ConvergedManifestPartsMap[entry->first] = entry->second->data;
34783476
}
34793477
}
3478+
3479+
_log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts");
3480+
}
3481+
3482+
// Now run through the copy and do the stats processing.
3483+
for (const auto& entry : ConvergedManifestPartsMap)
3484+
{
3485+
_log(logattribute::INFO, "GetScraperStatsByConvergedManifest", "Processing stats for project: " + entry.first);
3486+
3487+
ScraperStats mProjectScraperStats;
3488+
3489+
LoadProjectObjectToStatsByCPID(entry.first, entry.second, dMagnitudePerProject, mProjectScraperStats);
3490+
3491+
// Insert into overall map.
3492+
for (auto const& entry2 : mProjectScraperStats)
3493+
{
3494+
mScraperStats[entry2.first] = entry2.second;
3495+
}
34803496
}
34813497

34823498
ProcessNetworkWideFromProjectStats(mScraperStats);
@@ -3499,52 +3515,6 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsFromSingleManifest(CScraperManifes
34993515

35003516
ScraperStatsAndVerifiedBeacons stats_and_verified_beacons {};
35013517

3502-
/*
3503-
// Fill out the dummy ConvergedManifest structure. Note this assumes one-to-one part to project statistics BLOB.
3504-
// Needs to be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to
3505-
// one manifest.
3506-
int iPartNum = 0;
3507-
CDataStream ss(SER_NETWORK,1);
3508-
WriteCompactSize(ss, manifest.vParts.size());
3509-
uint256 nContentHashCheck;
3510-
3511-
for (const auto& iter : manifest.vParts)
3512-
{
3513-
std::string sProject;
3514-
3515-
if (iPartNum == 0)
3516-
sProject = "BeaconList";
3517-
else
3518-
sProject = manifest.projects[iPartNum-1].project;
3519-
3520-
// Serialize the hash to doublecheck the content hash.
3521-
ss << iter->hash;
3522-
3523-
iPartNum++;
3524-
}
3525-
ss << StructDummyConvergedManifest.ConsensusBlock;
3526-
3527-
nContentHashCheck = Hash(ss.begin(), ss.end());
3528-
3529-
if (nContentHashCheck != manifest.nContentHash)
3530-
{
3531-
_log(logattribute::ERR, "GetScraperStatsFromSingleManifest", "Selected Manifest content hash check failed! nContentHashCheck = "
3532-
+ nContentHashCheck.GetHex() + " and nContentHash = " + manifest.nContentHash.GetHex());
3533-
// Content hash check failed. Return empty mScraperStats
3534-
return stats_and_verified_beacons;
3535-
}
3536-
else // Content matches.
3537-
{
3538-
// The DummyConvergedManifest content hash is NOT the same as the hash above from the CScraper::manifest, because it needs to be in the order of the
3539-
// map key and on the data, not the order of vParts by the part hash. So, unfortunately, we have to walk through the map again to hash it correctly.
3540-
CDataStream ss2(SER_NETWORK,1);
3541-
for (const auto& iter : StructDummyConvergedManifest.ConvergedManifestPartsMap)
3542-
ss2 << iter.second;
3543-
3544-
StructDummyConvergedManifest.nContentHash = Hash(ss2.begin(), ss2.end());
3545-
}
3546-
*/
3547-
35483518
// Enumerate the count of active projects from the dummy converged manifest. One of the parts
35493519
// is the beacon list, is not a project, which is why that should not be included in the count.
35503520
// Populate the verified beacons map, and if it is don't count that either.
@@ -4371,8 +4341,8 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes
43714341

43724342
if (bConvergenceSuccessful)
43734343
{
4374-
LOCK(CScraperManifest::cs_mapManifest);
4375-
_log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest");
4344+
LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts);
4345+
_log(logattribute::INFO, "LOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts");
43764346

43774347
// Select agreed upon (converged) CScraper manifest based on converged hash.
43784348
auto pair = CScraperManifest::mapManifest.find(convergence->second.second);
@@ -4382,40 +4352,6 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes
43824352
// be fixed for more than one part per BLOB. This is easy in this case, because it is all from/referring to one manifest.
43834353
bool bConvergedContentHashMatches = StructConvergedManifest(manifest);
43844354

4385-
/*
4386-
StructConvergedManifest.ConsensusBlock = manifest.ConsensusBlock;
4387-
StructConvergedManifest.timestamp = GetAdjustedTime();
4388-
StructConvergedManifest.bByParts = false;
4389-
4390-
int iPartNum = 0;
4391-
CDataStream ss(SER_NETWORK,1);
4392-
WriteCompactSize(ss, manifest.vParts.size());
4393-
uint256 nContentHashCheck;
4394-
4395-
for (const auto& iter : manifest.vParts)
4396-
{
4397-
std::string sProject;
4398-
4399-
if (iPartNum == 0)
4400-
sProject = "BeaconList";
4401-
else
4402-
sProject = manifest.projects[iPartNum-1].project;
4403-
4404-
// Copy the parts data into the map keyed by project. This will also pick up the
4405-
// VerifiedBeacons "project" if present.
4406-
StructConvergedManifest.ConvergedManifestPartsMap.insert(std::make_pair(sProject, iter->data));
4407-
4408-
// Serialize the hash to doublecheck the content hash.
4409-
ss << iter->hash;
4410-
4411-
iPartNum++;
4412-
}
4413-
ss << StructConvergedManifest.ConsensusBlock;
4414-
4415-
nContentHashCheck = Hash(ss.begin(), ss.end());
4416-
4417-
*/
4418-
44194355
if (!bConvergedContentHashMatches)
44204356
{
44214357
bConvergenceSuccessful = false;
@@ -4425,19 +4361,6 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes
44254361
}
44264362
else // Content matches so we have a confirmed convergence.
44274363
{
4428-
/*
4429-
// Copy the MANIFEST content hash into the ConvergedManifest.
4430-
StructConvergedManifest.nUnderlyingManifestContentHash = convergence->first;
4431-
4432-
// The ConvergedManifest content hash is NOT the same as the hash above from the CScraper::manifest, because it needs to be in the order of the
4433-
// map key and on the data, not the order of vParts by the part hash. So, unfortunately, we have to walk through the map again to hash it correctly.
4434-
CDataStream ss2(SER_NETWORK,1);
4435-
for (const auto& iter : StructConvergedManifest.ConvergedManifestPartsMap)
4436-
ss2 << iter.second;
4437-
4438-
StructConvergedManifest.nContentHash = Hash(ss2.begin(), ss2.end());
4439-
*/
4440-
44414364
// Determine if there is an excluded project. If so, set convergence back to false and drop back to project level to try and recover project by project.
44424365
for (const auto& iProjects : projectWhitelist)
44434366
{
@@ -4467,7 +4390,7 @@ bool ScraperConstructConvergedManifest(ConvergedManifest& StructConvergedManifes
44674390
}
44684391
}
44694392

4470-
_log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest");
4393+
_log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts");
44714394
}
44724395

44734396
if (!bConvergenceSuccessful)
@@ -4898,7 +4821,7 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests()
48984821

48994822
_log(logattribute::INFO, "ScraperDeleteCScraperManifests", "Deleting old CScraperManifests.");
49004823

4901-
LOCK(CScraperManifest::cs_mapManifest);
4824+
LOCK2(CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts);
49024825
_log(logattribute::INFO, "LOCK", "CScraperManifest::cs_mapManifest");
49034826

49044827
// First check for unauthorized manifests just in case a scraper has been deauthorized.
@@ -4979,7 +4902,7 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests()
49794902
// that large. (The lock on CScraperManifest::cs_mapManifest is still held from above.)
49804903
mMapCSManifestsBinnedByScraper = BinCScraperManifestsByScraper();
49814904

4982-
_log(logattribute::INFO, "ENDLOCK", "CScraperManifest::cs_mapManifest");
4905+
_log(logattribute::INFO, "ENDLOCK2", "CScraperManifest::cs_mapManifest, CSplitBlob::cs_mapParts");
49834906

49844907
return mMapCSManifestsBinnedByScraper;
49854908
}
@@ -4989,21 +4912,29 @@ mmCSManifestsBinnedByScraper ScraperCullAndBinCScraperManifests()
49894912
// ---------------------------------------------- In ---------------------------------------- Out
49904913
bool LoadBeaconListFromConvergedManifest(const ConvergedManifest& StructConvergedManifest, ScraperBeaconMap& mBeaconMap)
49914914
{
4992-
// Find the beacon list.
4993-
auto iter = StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList");
4915+
boostio::filtering_istream in;
49944916

4995-
// Bail if the beacon list is not found, or the part is zero size (missing referenced part)
4996-
if (iter == StructConvergedManifest.ConvergedManifestPartPtrsMap.end() || iter->second->data.size() == 0)
49974917
{
4998-
return false;
4999-
}
4918+
LOCK(CSplitBlob::cs_mapParts);
4919+
_log(logattribute::INFO, "LOCK", "CSplitBlob::cs_mapParts");
50004920

5001-
boostio::basic_array_source<char> input_source(&iter->second->data[0], iter->second->data.size());
5002-
boostio::stream<boostio::basic_array_source<char>> ingzss(input_source);
4921+
// Find the beacon list.
4922+
auto iter = StructConvergedManifest.ConvergedManifestPartPtrsMap.find("BeaconList");
50034923

5004-
boostio::filtering_istream in;
5005-
in.push(boostio::gzip_decompressor());
5006-
in.push(ingzss);
4924+
// Bail if the beacon list is not found, or the part is zero size (missing referenced part)
4925+
if (iter == StructConvergedManifest.ConvergedManifestPartPtrsMap.end() || iter->second->data.size() == 0)
4926+
{
4927+
return false;
4928+
}
4929+
4930+
boostio::basic_array_source<char> input_source(&iter->second->data[0], iter->second->data.size());
4931+
boostio::stream<boostio::basic_array_source<char>> ingzss(input_source);
4932+
4933+
in.push(boostio::gzip_decompressor());
4934+
in.push(ingzss);
4935+
4936+
_log(logattribute::INFO, "ENDLOCK", "CSplitBlob::cs_mapParts");
4937+
}
50074938

50084939
std::string line;
50094940

@@ -5869,9 +5800,10 @@ UniValue scraperreport(const UniValue& params, bool fHelp)
58695800

58705801
total_convergences_part_unique_pointer_maps_size = global_cache_unique_parts.size();
58715802

5872-
part_objects_reduced = total_convergences_part_pointer_maps_size
5873-
- total_convergences_part_unique_pointer_maps_size;
5874-
5803+
// The part objects reduced is EQUAL to the total size of all of the convergence part pointers
5804+
// added up, because those would be the additional parts needed in the old parts map. The
5805+
// new maps are all pointers to the already existing parts in the global map.
5806+
part_objects_reduced = total_convergences_part_pointer_maps_size;
58755807

58765808
converged_scraper_stats_cache.pushKV("current_convergence_publishing_scrapers",
58775809
current_convergence_publishing_scrapers);

0 commit comments

Comments
 (0)