Skip to content

Commit

Permalink
refactor(walk): Add WebsiteWalkBuilder (#13)
Browse files Browse the repository at this point in the history
* refactor: add WebsiteWalkBuilder
* fix(walk): remove Accept-Encoding header
  • Loading branch information
DonIsaac authored Apr 28, 2024
1 parent 0700469 commit 65358e7
Show file tree
Hide file tree
Showing 14 changed files with 528 additions and 193 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/target
tmp
tarpaulin-report.html
7 changes: 6 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
{
"rust-analyzer.cargo.features": "all"
"rust-analyzer.cargo.features": "all",
"cSpell.words": [
"gitleaks",
"keyhunter",
"miette"
]
}
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: build run debug fmt lint clean
.PHONY: build run debug fmt lint test clean

build:
cargo build --release --all-features
Expand All @@ -15,6 +15,9 @@ lint:
cargo fmt --check
cargo clippy --all-targets --all-features -- -D warnings

test:
cargo test --all-features

clean:
rm -rf tmp

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@ cargo install keyhunter --all-features
You can also use it as a library:
```toml
[dependencies]
keyhunter = "0.1.0"
keyhunter = "0.1.1"
```

Library docs are available on [docs.rs](https://docs.rs/keyhunter/).

## Usage
Provide KeyHunter with a URL to start scanning from. It will visit all pages
on the same domain that URL links to, find all scripts referenced by those
Expand Down
38 changes: 31 additions & 7 deletions examples/yc_startups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@ extern crate pretty_env_logger;

use keyhunter::{
report::Reporter, ApiKeyCollector, ApiKeyError, ApiKeyMessage, Config, ScriptMessage,
WebsiteWalker,
WebsiteWalkBuilder,
};
use log::{error, info};
use miette::{miette, Context as _, Error, IntoDiagnostic as _, Result};
use rand::random;
use std::{
env,
fs::{self, File},
io::{BufWriter, Write},
path::{Path, PathBuf},
sync::{mpsc, Arc, RwLock},
thread,
time::Duration,
};

type SyncReporter = Arc<RwLock<Reporter>>;
Expand Down Expand Up @@ -79,13 +81,27 @@ fn write_keys(output: &mut BufWriter<File>, api_key: ApiKeyError) -> Result<()>
}

fn main() -> Result<()> {
// use RUST_LOG=keyhunter=info if RUST_LOG is not set
if std::env::var("RUST_LOG").is_err() {
std::env::set_var("RUST_LOG", "keyhunter=info");
// Use RUST_LOG=keyhunter=info if RUST_LOG is not set
if env::var("RUST_LOG").is_err() {
env::set_var("RUST_LOG", "keyhunter=info");
}
pretty_env_logger::init();
const MAX_WALKS: usize = 20;

// Sets the maximum number of pages that will be visited for some entrypoint
// URL. Higher values may cause a lot of wasted cycles as script/link
// extraction finds fewer and fewer new unique values, while lower values
// may leave a lot of stones unturned.
let max_walks: usize = env::var("MAX_WALKS")
.into_diagnostic()
.and_then(|w| w.parse().into_diagnostic())
.unwrap_or(30);
assert!(
max_walks > 0,
"MAX_WALKS cannot be zero otherwise no pages will be checked!"
);

let config = Arc::new(Config::gitleaks());

let reporter: SyncReporter = Arc::new(RwLock::new(Reporter::default().with_redacted(true)));

let yc_sites_raw = yc_file().unwrap();
Expand Down Expand Up @@ -119,6 +135,14 @@ fn main() -> Result<()> {
let _ = key_writer.flush();
});

let walk_builder = WebsiteWalkBuilder::new()
.with_max_walks(max_walks)
.with_random_ua(true)
.with_cookie_jar(true)
.with_shared_cache(true)
.with_close_channel(false)
.with_timeout(Duration::from_secs(15))
.with_timeout_connect(Duration::from_secs(2));
yc_reader
.into_records()
// .par_bridge()
Expand All @@ -132,14 +156,14 @@ fn main() -> Result<()> {

info!(target: "keyhunter::main", "Scraping keys for site {name}...");
let (tx_scripts, rx_scripts) = mpsc::channel::<ScriptMessage>();
let walker = WebsiteWalker::new(tx_scripts.clone());
let walker = walk_builder.build(tx_scripts.clone());
let collector = ApiKeyCollector::new(config.clone(), rx_scripts, key_sender.clone());

// Visit pages in the target site, sending found script urls over the
// script channel
let moved_url = url.clone();
let walk_handle = thread::spawn(move || {
let result = walker.with_max_walks(MAX_WALKS).walk(&moved_url);
let result = walker.walk(&moved_url);
if result.is_err() {
error!(target: "keyhunter::main",
"failed to create walker: {}",
Expand Down
18 changes: 9 additions & 9 deletions src/cmd/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::{
};

use keyhunter::{
ApiKeyCollector, ApiKeyMessage, ApiKeyReceiver, Config, ScriptMessage, WebsiteWalker,
ApiKeyCollector, ApiKeyMessage, ApiKeyReceiver, Config, ScriptMessage, WebsiteWalkBuilder,
};
use miette::{Context as _, Error, IntoDiagnostic as _, Result};

Expand Down Expand Up @@ -54,9 +54,13 @@ impl Runner {
) -> (ApiKeyReceiver, JoinHandle<Vec<Error>>) {
let (key_sender, key_receiver) = mpsc::channel::<ApiKeyMessage>();
let config = self.config.clone();
let max_walks = self.max_walks;
let headers = Arc::new(self.headers.clone());
let random_ua = self.random_ua;
let walk_builder = WebsiteWalkBuilder::default()
.with_max_walks(self.max_walks)
.with_random_ua(self.random_ua)
.with_headers(self.headers.clone())
.with_shared_cache(true)
.with_cookie_jar(true);

trace!("Starting runner thread");
// let mut errors: Arc<RwLock<Vec<Error>>> = Default::default();
Expand All @@ -72,20 +76,16 @@ impl Runner {
info!("Scraping keys for site '{url}'...");

let (tx_scripts, rx_scripts) = mpsc::channel::<ScriptMessage>();
let walker = WebsiteWalker::new(tx_scripts.clone()).with_random_ua(random_ua);
let walker = walk_builder.build(tx_scripts.clone());
let collector =
ApiKeyCollector::new(config.clone(), rx_scripts, key_sender.clone())
.with_random_ua(random_ua);

// Visit pages in the target site, sending found script urls over the
// script channel
let moved_url = url.clone();
let moved_headers = Arc::clone(&headers);
let walk_handle = thread::spawn(move || {
let result = walker
.with_max_walks(max_walks)
.with_headers(moved_headers.iter().cloned())
.walk(&moved_url);
let result = walker.walk(&moved_url);
if let Err(ref err) = result {
// println!("failed to create walker: {}", err);
println!("{:?}", err);
Expand Down
37 changes: 32 additions & 5 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ impl Default for Config {
}

impl Config {
/// Creates an empty config with no rules.
///
/// ## Example
/// ```
/// use keyhunter::Config;
///
/// let config = Config::empty();
/// assert!(config.is_empty());
/// ```
pub fn empty() -> Self {
Self {
rule_ids: Default::default(),
Expand Down Expand Up @@ -140,11 +149,29 @@ impl Config {
}

/// Returns the number of rules in the config.
///
/// ## Example
/// ```
/// use keyhunter::Config;
///
/// let config = Config::empty();
/// assert_eq!(config.len(), 0);
/// assert!(config.is_empty());
/// ```
#[inline]
pub fn len(&self) -> usize {
self.rule_ids.len()
}

/// Returns `true` if the config has no rules.
///
/// ## Example
/// ```
/// use keyhunter::Config;
///
/// assert!(Config::empty().is_empty());
/// assert!(!Config::gitleaks().is_empty());
/// ```
#[inline]
pub fn is_empty(&self) -> bool {
self.rule_ids.is_empty()
Expand Down Expand Up @@ -242,11 +269,11 @@ impl From<GitLeaksConfig> for Config {
mod test {
use super::*;

// #[test]
// fn test_default() {
// let config = Config::default();
// assert!(!config.name_rules().is_empty());
// }
#[test]
fn test_default() {
let config = Config::default();
assert!(!config.is_empty());
}

#[test]
fn from_gitleaks() -> Result<()> {
Expand Down
40 changes: 21 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
/// Copyright © 2024 Don Isaac
///
/// This file is part of KeyHunter.
///
/// KeyHunter is free software: you can redistribute it and/or modify it
/// under the terms of the GNU General Public License as published by the Free
/// Software Foundation, either version 3 of the License, or (at your option)
/// any later version.
///
/// KeyHunter is distributed in the hope that it will be useful, but WITHOUT
/// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
/// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
/// more details.
///
/// You should have received a copy of the GNU General Public License along with
/// KeyHunter. If not, see <https://www.gnu.org/licenses/>.
pub mod config;
//! `keyhunter` extracts API keys from JavaScript files.
//
// Copyright © 2024 Don Isaac
//
// This file is part of KeyHunter.
//
// KeyHunter is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// KeyHunter is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// KeyHunter. If not, see <https://www.gnu.org/licenses/>.
mod config;
mod extract;
pub(crate) mod http;
#[cfg(feature = "report")]
pub mod report;
mod walk;

pub use config::Config;
pub use config::{Config, RuleId};
pub use extract::{
ApiKeyCollector, ApiKeyError, ApiKeyExtractor, ApiKeyMessage, ApiKeyReceiver, ApiKeySender,
};
pub use walk::{ScriptMessage, ScriptReceiver, WebsiteWalker};
pub use walk::{ScriptMessage, ScriptReceiver, WebsiteWalkBuilder, WebsiteWalker};
2 changes: 1 addition & 1 deletion src/walk/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
mod website;
// TODO: file walker

pub use website::{ScriptMessage, ScriptReceiver, WebsiteWalker};
pub use website::{ScriptMessage, ScriptReceiver, WebsiteWalkBuilder, WebsiteWalker};
46 changes: 11 additions & 35 deletions src/walk/website/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub struct WalkFailedDiagnostic {
inner: WalkFailedDiagnosticInner,
}

#[derive(Debug, Error, Diagnostic)]
#[derive(Debug)]
enum WalkFailedDiagnosticInner {
Status {
status_code: u16,
Expand All @@ -56,7 +56,7 @@ enum WalkFailedDiagnosticInner {
},
Transport {
// inner: ureq::Error
#[source]
// #[source]
source: ureq::Transport,
},
}
Expand Down Expand Up @@ -87,15 +87,16 @@ impl WalkFailedDiagnostic {

Self {
url,
verbose: true,
// TODO: toggle this based on verbosity CLI flag
verbose: false,
inner,
}
}
}

impl fmt::Display for WalkFailedDiagnostic {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Failed to walk site at '{}': {}", self.url, self.inner)?;
write!(f, "Failed to walk site at '{}': ", self.url)?;

// write!(f, "Failed to walk site at '{}': ", self.url())?;
match &self.inner {
Expand All @@ -117,11 +118,13 @@ impl fmt::Display for WalkFailedDiagnostic {
for (header, value) in headers {
writeln!(f, " {}: {}", header, value)?;
}
}
if let Some(body) = &body {
write!(f, "\n\nResponse body:\n{}", body)
if let Some(body) = &body {
write!(f, "\n\nResponse body:\n{}", body)
} else {
write!(f, "\n\nNo response body")
}
} else {
write!(f, "\n\nNo response body")
Ok(())
}
}
WalkFailedDiagnosticInner::Transport { source, .. } => {
Expand All @@ -130,30 +133,3 @@ impl fmt::Display for WalkFailedDiagnostic {
}
}
}
impl fmt::Display for WalkFailedDiagnosticInner {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// write!(f, "Failed to walk site at '{}': ", self.url())?;
match self {
Self::Status {
status_code,
status_text,
body,
..
} => {
write!(
f,
"Server responded with status code {} ({})",
status_code, status_text
)?;
if let Some(body) = &body {
write!(f, "\n\nResponse body:\n{}", body)
} else {
write!(f, "\n\nNo response body")
}
}
Self::Transport { source, .. } => {
write!(f, "{}", source)
}
}
}
}
3 changes: 3 additions & 0 deletions src/walk/website/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@ mod dom_walker;
mod error;
mod url_visitor;
mod walk;
mod walk_builder;
mod walk_cache;

pub use walk::{ScriptMessage, ScriptReceiver, WebsiteWalker};
pub use walk_builder::WebsiteWalkBuilder;
Loading

0 comments on commit 65358e7

Please sign in to comment.