From 9d1d9b3dd651fb190d8c45f47908d9fbcd2e4545 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Sun, 22 May 2022 15:13:17 +0700 Subject: [PATCH] Improved GitLab data source and ported Searchcode source to API usage --- README.md | 4 +-- examples/config.ini | 9 +++-- resources/scripts/api/gitlab.ads | 36 ++++++++++++++++---- resources/scripts/api/searchcode.ads | 22 ++++++++++++ resources/scripts/scrape/searchcode.ads | 45 ------------------------- 5 files changed, 57 insertions(+), 59 deletions(-) create mode 100644 resources/scripts/api/searchcode.ads delete mode 100644 resources/scripts/scrape/searchcode.ads diff --git a/README.md b/README.md index 31a8c892f..e91f0cae0 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,11 @@ The OWASP Amass Project performs network mapping of attack surfaces and external | Technique | Data Sources | |:-------------|:-------------| -| APIs | 360PassiveDNS, Ahrefs, AnubisDB, BinaryEdge, BufferOver, BuiltWith, C99, Chaos, CIRCL, Cloudflare, DNSDB, DNSRepo, Detectify, FOFA, FullHunt, GitHub, GitLab, Greynoise, HackerTarget, Hunter, IntelX, LeakIX, Maltiverse, Mnemonic, N45HT, PassiveTotal, PentestTools, Quake, Shodan, SonarSearch, Spamhaus, Spyse, Sublist3rAPI, ThreatBook, ThreatCrowd, ThreatMiner, Twitter, URLScan, VirusTotal, ZETAlytics, ZoomEye | +| APIs | 360PassiveDNS, Ahrefs, AnubisDB, BinaryEdge, BufferOver, BuiltWith, C99, Chaos, CIRCL, Cloudflare, DNSDB, DNSRepo, Detectify, FOFA, FullHunt, GitHub, GitLab, Greynoise, HackerTarget, Hunter, IntelX, LeakIX, Maltiverse, Mnemonic, N45HT, PassiveTotal, PentestTools, Quake, Searchcode, Shodan, SonarSearch, Spamhaus, Spyse, Sublist3rAPI, ThreatBook, ThreatCrowd, ThreatMiner, Twitter, URLScan, VirusTotal, ZETAlytics, ZoomEye | | Certificates | Active pulls (optional), Censys, CertSpotter, Crtsh, Digitorus, FacebookCT, GoogleCT | | DNS | Brute forcing, Reverse DNS sweeping, NSEC zone walking, Zone transfers, FQDN alterations/permutations, FQDN Similarity-based Guessing | | Routing | ARIN, BGPTools, BGPView, IPdata, IPinfo, NetworksDB, RADb, Robtex, ShadowServer, TeamCymru | -| Scraping | AbuseIPDB, Ask, Baidu, Bing, DNSDumpster, DuckDuckGo, Gists, HackerOne, HyperStat, IPv4Info, PKey, RapidDNS, Riddler, Searchcode, Searx, SiteDossier, Yahoo | +| Scraping | AbuseIPDB, Ask, Baidu, Bing, DNSDumpster, DuckDuckGo, Gists, HackerOne, HyperStat, IPv4Info, PKey, RapidDNS, Riddler, Searx, SiteDossier, Yahoo | | Web Archives | ArchiveIt, Arquivo, CommonCrawl, HAW, UKWebArchive, Wayback | | WHOIS | AlienVault, AskDNS, DNSlytics, ONYPHE, SecurityTrails, SpyOnWeb, Umbrella, WhoisXMLAPI | diff --git a/examples/config.ini b/examples/config.ini index 56063d975..a4a55643c 100644 --- a/examples/config.ini +++ b/examples/config.ini @@ -223,15 +223,14 @@ minimum_ttl = 1440 ; One day #[data_sources.GitHub.accountname] #apikey = -# https://gitlab.com (Freemium) +# https://gitlab.com (Free) +# GitLab apikey is the personal access token with at least read_repository or api scope #[data_sources.GitLab] -#[data_sources.GitLab.free] -#apikey = -#[data_sources.GitLab.premium] +#ttl = 4320 +#[data_sources.GitLab.accountname] #apikey = # https://hackertarget.com (Paid/Free) -# HackerTarget can be used without an API key, but the key allows better results #[data_sources.HackerTarget] #ttl = 1440 #[data_sources.HackerTarget.Credentials] diff --git a/resources/scripts/api/gitlab.ads b/resources/scripts/api/gitlab.ads index 29ee61f4b..367bae2a4 100644 --- a/resources/scripts/api/gitlab.ads +++ b/resources/scripts/api/gitlab.ads @@ -1,6 +1,8 @@ -- Copyright 2021 Jeff Foley. All rights reserved. -- Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. +local json = require("json") + name = "GitLab" type = "api" @@ -32,15 +34,35 @@ function vertical(ctx, domain) return end - local scopes = {"issues", "blobs", "notes"} - for _, s in pairs(scopes) do - scrape(ctx, { - url=build_url(domain, s), - headers={['PRIVATE-TOKEN']=c.key}, + local resp, err = request(ctx, { + ['url']=search_url(domain, scope), + ['headers']={['PRIVATE-TOKEN']=c.key}, + }) + if (err ~= nil and err ~= "") then + log(ctx, "vertical request to service failed: " .. err) + return + end + + local j = json.decode(resp) + if (j == nil or #j == 0) then + return + end + + for _, item in pairs(j) do + local ok = scrape(ctx, { + ['url']=get_file_url(item.project_id, item.path, item.ref), + ['headers']={['PRIVATE-TOKEN']=c.key}, }) + if not ok then + send_names(ctx, item.data) + end end end -function build_url(domain, scope) - return "https://gitlab.com/api/v4/search?scope=" .. scope .. "&search=" .. domain:gsub("%.", "[.]") +function get_file_url(id, path, ref) + return "https://gitlab.com/api/v4/projects/" .. id .. "/repository/files/" .. path:gsub("/", "%%2f") .. "/raw?ref=" .. ref +end + +function search_url(domain) + return "https://gitlab.com/api/v4/search?scope=blobs&search=" .. domain end diff --git a/resources/scripts/api/searchcode.ads b/resources/scripts/api/searchcode.ads new file mode 100644 index 000000000..910c42719 --- /dev/null +++ b/resources/scripts/api/searchcode.ads @@ -0,0 +1,22 @@ +-- Copyright 2021 Jeff Foley. All rights reserved. +-- Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +name = "Searchcode" +type = "api" + +function start() + set_rate_limit(2) +end + +function vertical(ctx, domain) + for i=0,49 do + local ok = scrape(ctx, {['url']=build_url(domain, i)}) + if not ok then + return + end + end +end + +function build_url(domain, pagenum) + return "https://searchcode.com/api/codesearch_I/?per_page=100&q=." .. domain .. "&p=" .. pagenum +end diff --git a/resources/scripts/scrape/searchcode.ads b/resources/scripts/scrape/searchcode.ads deleted file mode 100644 index a50bc035c..000000000 --- a/resources/scripts/scrape/searchcode.ads +++ /dev/null @@ -1,45 +0,0 @@ --- Copyright 2021 Jeff Foley. All rights reserved. --- Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. - -name = "Searchcode" -type = "scrape" - -function start() - set_rate_limit(2) -end - -function vertical(ctx, domain) - for i=0,20 do - local page, err = request(ctx, {['url']=build_url(domain, i)}) - if (err ~= nil and err ~= "") then - log(ctx, "vertical request to service failed: " .. err) - break - end - - local found = find_names(ctx, page:gsub("", ""), domain) - if not found then - break - end - end -end - -function build_url(domain, pagenum) - return "https://searchcode.com/?q=." .. domain .. "&p=" .. pagenum -end - -function find_names(ctx, content, domain) - local names = find(content, subdomain_regex) - if (names == nil or #names == 0) then - return false - end - - local found = false - for _, name in pairs(names) do - if in_scope(ctx, name) then - found = true - new_name(ctx, name) - end - end - - return found -end