diff --git a/README.md b/README.md index e792aac..a304be0 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Статистика доменов -Скрипт для сбора статистики для зон ru/su/rf. Собираются все записи c DNS, -Автономная система, переод делегирования +Скрипт для сбора статистики для зон ru/su/rf. Собираются все записи c DNS (A, AAAA, NS, MX, TXT), +данные об автономной системе к который пренадлежит домен, переоду делегирования и так далее. На основе этих данных можно +строить статистику. -Сайт проекта http://firststat.ru - -Статья с описанием https://habrahabr.ru/post/301894/ +* Сайт проекта http://firststat.ru +* Статья с описанием https://habrahabr.ru/post/301894/ Для работы необходимы модули: - mysqlclient==1.4.6 @@ -42,21 +42,7 @@ Далее каждую ночь база данных доменов будет обновляться. На двух процессорах E5-2690v2 с 225 гигабайтами памяти процесс обновления БД з -анимает 6-8 часов, плюс еще несколько часов агрегирование данных. - -# TODO - -* https://habr.com/ru/post/66151/ -* собирать статистику за сегодня из domain, а не из domain_history -* подключить RPKI -* скачивание и unzip сделать паралельно -* randomize_servers = on; ?? и в принуипе разобратся с рекурсером, можно ли его ускорить или все запросы так и пересылать в google -* pdnsd-ctl status - -# Пример отчета (1 августа 2015 года) - -![example](https://scontent.xx.fbcdn.net/hphotos-xpt1/t31.0-8/11779902_855515371153091_8587193411725580989_o.png) - +анимает 3-6 часов. diff --git a/classes/asInet.py b/classes/asInet.py index a9c6702..6743294 100644 --- a/classes/asInet.py +++ b/classes/asInet.py @@ -9,6 +9,7 @@ import urllib.parse import traceback import time +import typing from helpers.helperUnicode import as_default_string from helpers.helpers import get_mysql_connection @@ -106,10 +107,10 @@ def parsing_as(self, show_log: bool = False, max_as: int = MAX_AS_NUMBER) -> Non as_data = self._get_all_as_info() for i in range(1, max_as): - print("Update as %s" % i) - self.update_as(i, as_data, show_log=show_log) - - self.update_as(198610, as_data, show_log=show_log) + if i in as_data: + self.update_as(i, as_data=as_data[i], show_log=show_log) + else: + self.update_as(i, as_data=None, show_log=show_log) def _get_asn_description(self, number: int) -> dict: """ @@ -142,7 +143,7 @@ def _get_asn_description(self, number: int) -> dict: 'DESCRIPTION': description, 'USE_FAST': 0} - def update_as(self, number: int, as_data: dict, show_log: bool = False) -> bool: + def update_as(self, number: int, as_data: typing.Union[dict, None], show_log: bool = False) -> bool: """ Обновляем информацию об AS в базе данных """ @@ -157,16 +158,15 @@ def update_as(self, number: int, as_data: dict, show_log: bool = False) -> bool: count = cursor.fetchone() - if number in as_data and count['count'] != 0: + if as_data and count['count'] != 0: as_info = {'AS': number, - 'COUNTRY': as_data[number]['country'], + 'COUNTRY': as_data['country'], 'ORGANIZATION': '', 'DATE_REGISTER': '', - 'DESCRIPTION': as_data[number]['descriptions'], + 'DESCRIPTION': as_data['descriptions'], 'USE_FAST': 1} else: try: - time.sleep(.2) as_info = self._get_asn_description(number) except: as_info = {'AS': number, diff --git a/classes/command/bgpdump.py b/classes/command/bgpdump.py index c9c14ca..f7efbce 100644 --- a/classes/command/bgpdump.py +++ b/classes/command/bgpdump.py @@ -15,14 +15,15 @@ class Bgpdump(Command): } ':type : dict' - def __init__(self, path: str): + def __init__(self, path_in: str, path_out: str): """ - :type path: unicode + :type path_in: unicode :return: """ super(Bgpdump, self).__init__("bgpdump") self.binary = [os.path.abspath(CURRENT_PATH+'/bin/bgpdump')] - self.path = path + self.path_in = path_in + self.path_out = path_out def get_command(self) -> list: """ @@ -31,4 +32,4 @@ def get_command(self) -> list: :rtype: list """ - return self.binary + [self.path] + return self.binary + ["-O", self.path_out, self.path_in] diff --git a/classes/converter.py b/classes/converter.py index 62e27b3..11ce4cd 100644 --- a/classes/converter.py +++ b/classes/converter.py @@ -89,16 +89,13 @@ def parse_file_rib_file_to(self, path_rib_file: str or None = None, path_to: str path_rib_file = os.path.abspath(os.path.join(self.path, 'rib.bz2')) path_to = os.path.abspath(os.path.join(self.work_path, 'rib')) - bgp_dump = Bgpdump(path_rib_file) - command = bgp_dump.get_command() - shutil.rmtree(path_to, ignore_errors=True) - file_rib = open(path_to, 'w') + bgp_dump = Bgpdump(path_rib_file, path_to) + command = bgp_dump.get_command() - p = SubprocessRunner(command=command, stdout=file_rib) + p = SubprocessRunner(command=command) p.run() p.wait(write_output_in_log=False) - file_rib.close() return path_to @@ -116,7 +113,6 @@ def convert_rib_to_net_as(self, path_rib_file: str or bool = False) -> dict: # NEXT_HOP: 80.91.255.62 # AGGREGATOR: AS24940 213.133.96.18 - :type path_rib_file: unicode :return: """ @@ -131,8 +127,8 @@ def convert_rib_to_net_as(self, path_rib_file: str or bool = False) -> dict: prefix = '' as_path = '' - file_rib_data = open(path_rib_file) - line = file_rib_data.readline() + file_rib_data = open(path_rib_file, 'r') + line: str = file_rib_data.readline() while line: symbol = line[0] if symbol == 'T' or symbol == 'S' or symbol == 'F' or symbol == 'O' or symbol == 'N': diff --git a/classes/downloader.py b/classes/downloader.py index 6d31da2..20a53a1 100644 --- a/classes/downloader.py +++ b/classes/downloader.py @@ -4,6 +4,7 @@ from classes.command.wget import Wget from helpers.helpers import * import shutil +import concurrent.futures from helpers.helpersCollor import BColor @@ -47,6 +48,24 @@ def download_file(url: str, data_dir: str) -> bool: return True + @staticmethod + def download(path: str, item: dict): + """ + :return: + """ + file_name = item['file_name'] + url = item['url'] + path_file = os.path.abspath(os.path.join(path, file_name)) + + BColor.process("Download %s to %s " % (url, path_file)) + shutil.rmtree(path_file, ignore_errors=True) + Downloader.download_file(url, path_file) + if os.path.getsize(path_file) == 0: + BColor.error("Can`t download file %s to %s" % (url, path_file)) + raise Exception("Can`t download file %s to %s" % (url, path_file)) + + return os.path.getsize(path_file) + @staticmethod def download_data_for_current_date() -> str: """ @@ -73,13 +92,15 @@ def download_data_for_current_date() -> str: path = Downloader.create_data_dir() - for item in files_list: - path_file = os.path.abspath(os.path.join(path, item['file_name'])) - BColor.process("Download %s to %s " % (item['url'], path_file)) - shutil.rmtree(path_file, ignore_errors=True) - Downloader.download_file(item['url'], path_file) - if os.path.getsize(path_file) == 0: - BColor.error("Can`t download file %s to %s" % (item['url'], path_file)) - raise Exception("Can`t download file %s to %s" % (item['url'], path_file)) + with concurrent.futures.ThreadPoolExecutor(max_workers=len(files_list)) as executor: + future_to_download = {executor.submit(Downloader.download, + path, + item): item for item in files_list} + for future in concurrent.futures.as_completed(future_to_download, timeout=1800): + item = future_to_download[future] + file_name = item['file_name'] + url = item['url'] + array_data = future.result() + BColor.ok("Download url %s to %s, size is %i" % (url, file_name, array_data)) return path diff --git a/doc/structure.sql b/doc/structure.sql index 9edaac8..d108f4e 100644 --- a/doc/structure.sql +++ b/doc/structure.sql @@ -633,29 +633,6 @@ CREATE TABLE `ns_domain_old_count_statistic` ( ) ENGINE=InnoDB AUTO_INCREMENT=9396429 DEFAULT CHARSET=utf8; /*!40101 SET character_set_client = @saved_cs_client */; --- --- Table structure for table `providers_like_statistic` --- - -DROP TABLE IF EXISTS `providers_like_statistic`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `providers_like_statistic` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `date` date NOT NULL, - `name` varchar(70) DEFAULT NULL, - `tld` varchar(32) DEFAULT NULL, - `count` bigint(20) NOT NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `uniq` (`date`,`name`,`tld`), - KEY `date` (`date`), - KEY `date_tld` (`date`,`tld`), - KEY `i_ns` (`name`), - KEY `i_ns_tld` (`name`,`tld`), - KEY `i_date_ns` (`name`,`date`) -) ENGINE=InnoDB AUTO_INCREMENT=3392465 DEFAULT CHARSET=utf8; -/*!40101 SET character_set_client = @saved_cs_client */; - -- -- Table structure for table `registrant` -- diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index df24737..5cc7335 100755 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -26,7 +26,7 @@ runner: volumes: - ../:/home/domain_statistic:rw - ./counteiner_data/download/:/home/domain_statistic/download:rw - - ./runner/config/root:/var/spool/cron/crontabs/root:rw + - ./runner/config/root:/var/spool/cron/crontabs/root:ro #rpki: # build: rpki/ @@ -42,7 +42,7 @@ devrunner: links: - "mysql:db" - "recurcer:resolver" -# - "rpki:rpki" + # - "rpki:rpki" mem_limit: "200G" ports: - "2222:2222" diff --git a/docker/mysql/create_base.sh b/docker/mysql/create_base.sh index fdd3fae..699a55a 100755 --- a/docker/mysql/create_base.sh +++ b/docker/mysql/create_base.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash echo "create database domain_statistic;" | mysql mysql; -echo "GRANT ALL PRIVILEGES ON domain_statistic.* TO domain_statistic@'%' IDENTIFIED BY '0123456789';" | mysql mysql; +echo "GRANT ALL PRIVILEGES ON domain_statistic.* TO domain_statistic@'%' IDENTIFIED BY '120686120686120686DePole12';" | mysql mysql; echo "GRANT SELECT ON domain_statistic.* TO readonlyqweqweqwe@'%' IDENTIFIED BY 'readonlyqweqweqwe';" | mysql mysql; echo "FLUSH PRIVILEGES;" | mysql mysql; MYPASSWD=$RANDOM$RANDOM$RANDOM diff --git a/docker/rpki/install_minit.sh b/docker/rpki/install_minit.sh deleted file mode 100755 index 4c43eee..0000000 --- a/docker/rpki/install_minit.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -mkdir -p /etc/minit - -# This is roughly equivalent to add-apt-repository ppa:chazomaticus/minit. -apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E007F6BD -echo "deb http://ppa.launchpad.net/chazomaticus/minit/ubuntu quantal main" > /etc/apt/sources.list.d/minit.list -apt-get update && apt-get upgrade -y && apt-get install -y minit # etc. - -apt-get clean; -rm -rf /tmp/* /var/lib/apt/lists/* /var/tmp/*; - -echo "Done" \ No newline at end of file diff --git a/docker/runner/config/root b/docker/runner/config/root index fa43dbc..d1326ee 100644 --- a/docker/runner/config/root +++ b/docker/runner/config/root @@ -1,7 +1,10 @@ -SHELL=/bin/sh +# DO NOT EDIT THIS FILE - edit the master and reinstall. +# (/tmp/crontab.XlhJVj/crontab installed on Sat Apr 11 19:42:02 2020) +# (Cron version -- $Id: crontab.c,v 2.13 1994/01/17 03:20:37 vixie Exp $) +SHELL=/bin/bash PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -10 1 */5 * * /usr/local/bin/python3.7 /home/domain_statistic/update_as_info.py >> /home/domain_statistic/download/update_as_info_`date +%Y-%m-%d`.log -0 6 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_domain.py -n `cat /etc/resolv.conf | awk '{print $2}'` -u 2>&1 &>/home/domain_statistic/download/update_domain_`date +%Y-%m-%d`.log -#1 1 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_rpki_history.py >> /home/domain_statistic/download/update_rpki.log -#1 10 */7 * * /usr/local/bin/python3.7 /home/domain_statistic/normalization.py \ No newline at end of file +10 1 */5 * * /usr/local/bin/python3.7 /home/domain_statistic/update_as_info.py >> /home/domain_statistic/download/update_as_info_`/bin/date +\%Y-\%m-\%d`.log +10 6 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_domain.py -n `cat /etc/resolv.conf | awk '{print $2}'` -u 2>&1 &> /home/domain_statistic/download/update_domain_`/bin/date +\%Y-\%m-\%d`.log +#1 1 * * * /usr/local/bin/python3.7 /home/domain_statistic/update_rpki_history.py >> /home/domain_statistic/download/update_rpki.log +#1 10 */7 * * /usr/local/bin/python3.7 /home/domain_statistic/normalization.py diff --git a/update_domain.py b/update_domain.py index 2dc4248..134890d 100644 --- a/update_domain.py +++ b/update_domain.py @@ -87,7 +87,7 @@ def load_prefix_list_from_var(prefix_list: dict) -> SubnetTree.SubnetTree: BColor.process("Unzip file") converter = Converter(path, delete_work_dir=(not args.show_verbose)) - BColor.process("Parsing rib file") + BColor.process("Parsing rib file (run bgpdump)") converter.parse_file_rib_file_to() BColor.process("Get AS list")