From 67f066516f02e548ec4a5c8c7dab6cc876304ed5 Mon Sep 17 00:00:00 2001 From: Pengyu Chen Date: Thu, 6 Jun 2019 07:58:45 +0100 Subject: [PATCH 1/2] Fixed issue related to logging --- hcf_backend/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hcf_backend/manager.py b/hcf_backend/manager.py index 3b7ac8f..b40b67b 100644 --- a/hcf_backend/manager.py +++ b/hcf_backend/manager.py @@ -40,7 +40,7 @@ def flush(self, slot=None): slot_obj = self._frontier.get(slot) slot_obj.flush() self._links_to_flush_count[slot] = 0 - LOG.info('Flushed %d link(s) to slot %s', slot) + LOG.info('Flushed %d link(s) to slot %s', n_links_to_flush, slot) return n_links_to_flush From de89726de4ea77e91958a4ac0fdcbcea2d653b9b Mon Sep 17 00:00:00 2001 From: artur Date: Tue, 26 Nov 2019 17:58:06 +0100 Subject: [PATCH 2/2] add more retries and extend retry time --- hcf_backend/utils/crawlmanager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hcf_backend/utils/crawlmanager.py b/hcf_backend/utils/crawlmanager.py index 5a119bc..3f7c4d0 100644 --- a/hcf_backend/utils/crawlmanager.py +++ b/hcf_backend/utils/crawlmanager.py @@ -8,6 +8,8 @@ import random import logging +from scrapinghub import ScrapinghubClient + from shub_workflow.crawl import CrawlManager from hcf_backend.utils.hcfpal import HCFPal @@ -23,6 +25,7 @@ class HCFCrawlManager(CrawlManager): def __init__(self): super().__init__() + super.client = ScrapinghubClient(max_retries=10, max_retry_time=3600) self.hcfpal = HCFPal(self.client._hsclient.get_project(self.project_id)) def add_argparser_options(self):