From 1c6f6a91a8aa377ea8a5206a0190d8d9b2c27aec Mon Sep 17 00:00:00 2001 From: Brendan Heywood Date: Tue, 4 Feb 2025 11:42:19 +1100 Subject: [PATCH 1/2] Exposed verbose debugging mode in UI --- classes/robot/crawler.php | 3 ++- lang/en/tool_crawler.php | 5 +++++ lib.php | 5 +++++ settings.php | 11 +++++++++++ version.php | 4 ++-- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/classes/robot/crawler.php b/classes/robot/crawler.php index f3706994..1547fa3b 100644 --- a/classes/robot/crawler.php +++ b/classes/robot/crawler.php @@ -861,8 +861,9 @@ public function parse_html($node, $external, $verbose = false) { } while ($walk); $text = self::clean_html_node_content($e); + $text = trim($text); if ($verbose > 1) { - printf (" - Found link to: %-20s / %-50s => %-50s\n", $text, $e->href, $href); + printf (" - Found link to: %-30s -> %s\n", "'$text'", $href); } $this->link_from_node_to_url($node, $href, $text, $idattr); } diff --git a/lang/en/tool_crawler.php b/lang/en/tool_crawler.php index 0cffae85..3be7f8db 100644 --- a/lang/en/tool_crawler.php +++ b/lang/en/tool_crawler.php @@ -75,6 +75,11 @@ $string['crawlend'] = 'Crawl end'; $string['crawlstart'] = 'Crawl start'; $string['cronticks'] = 'Cron ticks'; +$string['debugging'] = 'Verbose debugging'; +$string['debugoff'] = 'Debugging off'; +$string['debugnormal'] = 'Normal debugging'; +$string['debugverbose'] = 'Verbose debugging'; +$string['debuggingdesc'] = 'This turns on debugging in the task output'; $string['disablebot'] = 'Disable the link crawler robot'; $string['disablebotdesc'] = 'Make the crawler do nothing when a scheduled task is executed. This effectively prevents crawling of links and running of bot cleanup functions. Intended to deactivate or temporarily pause the crawler without having to disable all its scheduled tasks.'; $string['duration'] = 'Duration'; diff --git a/lib.php b/lib.php index 76877f3d..ecee40d1 100644 --- a/lib.php +++ b/lib.php @@ -45,6 +45,11 @@ function tool_crawler_crawl($verbose = false) { $robot = new crawler(); $url = new url(); $config = $robot::get_config(); + + if ($config->debugging) { + $verbose = $config->debugging; + } + $crawlstart = $config->crawlstart; $crawlend = $config->crawlend; diff --git a/settings.php b/settings.php index 25772777..644d7331 100644 --- a/settings.php +++ b/settings.php @@ -209,5 +209,16 @@ new lang_string('disablebot', 'tool_crawler'), new lang_string('disablebotdesc', 'tool_crawler'), '0' )); + + $options = [ + 0 => new lang_string('debugoff', 'tool_crawler'), + 1 => new lang_string('debugnormal', 'tool_crawler'), + 2 => new lang_string('debugverbose', 'tool_crawler'), + ]; + $settings->add(new admin_setting_configselect('tool_crawler/debugging', + new lang_string('debugging', 'tool_crawler'), + new lang_string('debuggingdesc', 'tool_crawler'), + 0, + $options)); } } diff --git a/version.php b/version.php index 7e71e967..62f8b8e8 100644 --- a/version.php +++ b/version.php @@ -27,8 +27,8 @@ defined('MOODLE_INTERNAL') || die(); -$plugin->version = 2024031401; // The current plugin version (Date: YYYYMMDDXX) -$plugin->release = 2024031401; // The current plugin version (Date: YYYYMMDDXX) +$plugin->version = 2025020401; // The current plugin version (Date: YYYYMMDDXX) +$plugin->release = 2025020401; // The current plugin version (Date: YYYYMMDDXX) $plugin->requires = 2016021800; // Requires this Moodle version. $plugin->supported = [34, 405]; $plugin->component = 'tool_crawler'; // To check on upgrade, that module sits in correct place. From 7a934e0f2836c43917f03baa6ca77400f823d6a6 Mon Sep 17 00:00:00 2001 From: Brendan Heywood Date: Tue, 4 Feb 2025 11:42:33 +1100 Subject: [PATCH 2/2] Store cookies in per worker local file #193 --- classes/robot/crawler.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/classes/robot/crawler.php b/classes/robot/crawler.php index 1547fa3b..b1164651 100644 --- a/classes/robot/crawler.php +++ b/classes/robot/crawler.php @@ -1090,7 +1090,12 @@ private static function determine_filesize($curlhandle, $method, $success, $body public function scrape($url) { global $CFG; - $cookiefilelocation = $CFG->dataroot . '/tool_crawler_cookies.txt'; + + static $cookiefilelocaion = ''; + if (!$cookiefilelocation) { + $cookiefilelocation = make_request_directory() . '/tool_crawler_cookies.txt'; + } + $config = self::get_config(); $version = moodle_major_version();