From a1968355c352e707b84e7ccfc6112c7385ac4c86 Mon Sep 17 00:00:00 2001 From: shouya <526598+shouya@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:14:03 +0900 Subject: [PATCH 1/4] implement find_magnet filter --- src/filter.rs | 1 + src/filter/find_magnet.rs | 140 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 src/filter/find_magnet.rs diff --git a/src/filter.rs b/src/filter.rs index 391fc8a..476a458 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,4 +1,5 @@ mod convert; +mod find_magnet; mod full_text; mod highlight; mod html; diff --git a/src/filter/find_magnet.rs b/src/filter/find_magnet.rs new file mode 100644 index 0000000..c78cc00 --- /dev/null +++ b/src/filter/find_magnet.rs @@ -0,0 +1,140 @@ +use regex::Regex; +use rss::Enclosure; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{ + feed::{Feed, Post}, + util::{ConfigError, Error}, +}; + +use super::{FeedFilter, FeedFilterConfig, FilterContext}; + +#[derive( + JsonSchema, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Hash, +)] +/// Find magnet link discovered in the body of entries and save it in +/// the enclosure (RSS)/link (Atom). The resulting feed can be used in +/// a torrent client. +pub struct FindMagnetConfig { + /// Match any `[a-fA-F0-9]{40}` as the info hash. + #[serde(default)] + info_hash: bool, + /// Whether or not to override existing magnet links in the enclosure/link. + #[serde(default)] + override_existing: bool, +} + +pub struct FindMagnet { + config: FindMagnetConfig, +} + +#[async_trait::async_trait] +impl FeedFilterConfig for FindMagnetConfig { + type Filter = FindMagnet; + + async fn build(self) -> Result { + Ok(FindMagnet { config: self }) + } +} + +#[async_trait::async_trait] +impl FeedFilter for FindMagnet { + async fn run( + &self, + _ctx: &mut FilterContext, + mut feed: Feed, + ) -> Result { + let mut posts = feed.take_posts(); + + for post in posts.iter_mut() { + let bodies = post.bodies(); + let link: Option = bodies + .iter() + .flat_map(|body| find_magnet_links(body, &self.config)) + .next(); + + if let Some(link) = link { + set_magnet_link(post, link, self.config.override_existing); + } + } + + feed.set_posts(posts); + Ok(feed) + } +} + +lazy_static::lazy_static! { + static ref MAGNET_LINK_REGEX: Regex = Regex::new( + r"(?i)\b(?Pmagnet:\?xt=urn:btih:[a-fA-F0-9]{40}(&\w+=[^\s]+)*)\b" + ) + .unwrap(); + static ref INFO_HASH_REGEX: Regex = + Regex::new(r"\b(?i)(?P[a-fA-F0-9]{40})\b").unwrap(); +} + +fn existing_magnet_link(post: &Post) -> Option<&str> { + match post { + Post::Rss(p) => p + .enclosure() + .into_iter() + .filter(|e| e.mime_type() == "application/x-bittorrent") + .map(|e| e.url()) + .next(), + Post::Atom(p) => p + .links() + .iter() + .filter(|l| l.href().starts_with("magnet:")) + .map(|l| l.href()) + .next(), + } +} + +fn set_magnet_link(post: &mut Post, link: String, override_: bool) { + if !override_ && existing_magnet_link(post).is_none() { + return; + } + + match post { + Post::Rss(p) => { + let enclosure = Enclosure { + url: link, + mime_type: "application/x-bittorrent".to_string(), + length: "".to_string(), + }; + p.set_enclosure(enclosure); + } + Post::Atom(p) => { + let link = atom_syndication::Link { + href: link, + mime_type: Some("application/x-bittorrent".to_string()), + ..Default::default() + }; + p.links.push(link); + } + } +} + +fn find_magnet_links(text: &str, config: &FindMagnetConfig) -> Vec { + let regex = if config.info_hash { + &*INFO_HASH_REGEX + } else { + &*MAGNET_LINK_REGEX + }; + + let captures: Vec = regex.captures_iter(text).collect(); + + captures + .into_iter() + .map(|m| { + if config.info_hash { + format!( + "magnet:?xt=urn:btih:{}", + m.name("info_hash").unwrap().as_str() + ) + } else { + m.name("full").unwrap().as_str().to_string() + } + }) + .collect() +} From bb21e83d4f55737bae71af7b2c9ca5f94977eae6 Mon Sep 17 00:00:00 2001 From: shouya <526598+shouya@users.noreply.github.com> Date: Fri, 28 Jun 2024 22:14:31 +0900 Subject: [PATCH 2/4] register find_magnet filter --- src/filter.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/filter.rs b/src/filter.rs index 476a458..638e485 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -228,4 +228,5 @@ define_filters!( Note => note::NoteFilterConfig, "Add non-functional comment"; ConvertTo => convert::ConvertToConfig, "Convert feed to another format"; Limit => limit::LimitConfig, "Limit the number of posts"; + FindMagnet => find_magnet::FindMagnetConfig, "Find magnet links in posts"; ); From d5242ae8cc7b6d7b0a39be87b031fe356751246d Mon Sep 17 00:00:00 2001 From: shouya <526598+shouya@users.noreply.github.com> Date: Sat, 29 Jun 2024 21:07:17 +0900 Subject: [PATCH 3/4] support info_hash v2 --- src/filter/find_magnet.rs | 63 +++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 6 deletions(-) diff --git a/src/filter/find_magnet.rs b/src/filter/find_magnet.rs index c78cc00..4e42ae9 100644 --- a/src/filter/find_magnet.rs +++ b/src/filter/find_magnet.rs @@ -2,6 +2,7 @@ use regex::Regex; use rss::Enclosure; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use tracing::warn; use crate::{ feed::{Feed, Post}, @@ -66,11 +67,12 @@ impl FeedFilter for FindMagnet { lazy_static::lazy_static! { static ref MAGNET_LINK_REGEX: Regex = Regex::new( - r"(?i)\b(?Pmagnet:\?xt=urn:btih:[a-fA-F0-9]{40}(&\w+=[^\s]+)*)\b" + // btih: bt info hash v1; btmh: bt info hash v2 + r"\b(?magnet:\?xt=urn:bt(ih:[a-fA-F0-9]{40}|mh:[a-fA-F0-9]{68})(&[\w.]+=[^\s]+)*)\b" ) .unwrap(); static ref INFO_HASH_REGEX: Regex = - Regex::new(r"\b(?i)(?P[a-fA-F0-9]{40})\b").unwrap(); + Regex::new(r"\b(?[a-fA-F0-9]{40}|[a-fA-F0-9]{68})\b").unwrap(); } fn existing_magnet_link(post: &Post) -> Option<&str> { @@ -128,13 +130,62 @@ fn find_magnet_links(text: &str, config: &FindMagnetConfig) -> Vec { .into_iter() .map(|m| { if config.info_hash { - format!( - "magnet:?xt=urn:btih:{}", - m.name("info_hash").unwrap().as_str() - ) + let info_hash = m.name("info_hash").unwrap().as_str(); + if info_hash.len() == 40 { + format!("magnet:?xt=urn:btih:{}", info_hash) + } else if info_hash.len() == 68 { + format!("magnet:?xt=urn:btmh:{}", info_hash) + } else { + warn!("Bad length for info hash: {}", info_hash); + format!("magnet:?xt=urn:btih:{}", info_hash) + } } else { m.name("full").unwrap().as_str().to_string() } }) .collect() } + +#[cfg(test)] +mod test { + #[test] + fn test_find_magnet_links() { + let text = "HELLO magnet:?xt=urn:btih:1234567890ABCDEF1234567890ABCDEF12345678&dn=hello+world WORLD"; + let links = super::find_magnet_links( + text, + &super::FindMagnetConfig { + info_hash: false, + override_existing: false, + }, + ); + assert_eq!( + links, + vec![ + "magnet:?xt=urn:btih:1234567890ABCDEF1234567890ABCDEF12345678&dn=hello+world" + ] + ); + + let text = "HELLO 1234567890ABCDEF1234567890ABCDEF12345678 WORLD"; + let links = super::find_magnet_links( + text, + &super::FindMagnetConfig { + info_hash: true, + override_existing: false, + }, + ); + assert_eq!( + links, + vec!["magnet:?xt=urn:btih:1234567890ABCDEF1234567890ABCDEF12345678"] + ); + + let text = "HELLO 1234567890ABCDEF1234567890ABCDEF12345678 WORLD"; + let links = super::find_magnet_links( + text, + &super::FindMagnetConfig { + info_hash: false, + override_existing: false, + }, + ); + assert!(links.is_empty()); + } +} From 8d8b8829a83b0618dc25f6119dcef8e90f8e9e26 Mon Sep 17 00:00:00 2001 From: shouya <526598+shouya@users.noreply.github.com> Date: Sat, 29 Jun 2024 21:14:45 +0900 Subject: [PATCH 4/4] rename the filter's name from find_magnet to magnet --- src/filter.rs | 4 ++-- src/filter/{find_magnet.rs => magnet.rs} | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) rename src/filter/{find_magnet.rs => magnet.rs} (91%) diff --git a/src/filter.rs b/src/filter.rs index 638e485..61b59ab 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,10 +1,10 @@ mod convert; -mod find_magnet; mod full_text; mod highlight; mod html; mod js; mod limit; +mod magnet; mod merge; mod note; mod sanitize; @@ -228,5 +228,5 @@ define_filters!( Note => note::NoteFilterConfig, "Add non-functional comment"; ConvertTo => convert::ConvertToConfig, "Convert feed to another format"; Limit => limit::LimitConfig, "Limit the number of posts"; - FindMagnet => find_magnet::FindMagnetConfig, "Find magnet links in posts"; + Magnet => magnet::MagnetConfig, "Find magnet links in posts"; ); diff --git a/src/filter/find_magnet.rs b/src/filter/magnet.rs similarity index 91% rename from src/filter/find_magnet.rs rename to src/filter/magnet.rs index 4e42ae9..723c7e8 100644 --- a/src/filter/find_magnet.rs +++ b/src/filter/magnet.rs @@ -17,7 +17,7 @@ use super::{FeedFilter, FeedFilterConfig, FilterContext}; /// Find magnet link discovered in the body of entries and save it in /// the enclosure (RSS)/link (Atom). The resulting feed can be used in /// a torrent client. -pub struct FindMagnetConfig { +pub struct MagnetConfig { /// Match any `[a-fA-F0-9]{40}` as the info hash. #[serde(default)] info_hash: bool, @@ -26,21 +26,21 @@ pub struct FindMagnetConfig { override_existing: bool, } -pub struct FindMagnet { - config: FindMagnetConfig, +pub struct Magnet { + config: MagnetConfig, } #[async_trait::async_trait] -impl FeedFilterConfig for FindMagnetConfig { - type Filter = FindMagnet; +impl FeedFilterConfig for MagnetConfig { + type Filter = Magnet; async fn build(self) -> Result { - Ok(FindMagnet { config: self }) + Ok(Magnet { config: self }) } } #[async_trait::async_trait] -impl FeedFilter for FindMagnet { +impl FeedFilter for Magnet { async fn run( &self, _ctx: &mut FilterContext, @@ -117,7 +117,7 @@ fn set_magnet_link(post: &mut Post, link: String, override_: bool) { } } -fn find_magnet_links(text: &str, config: &FindMagnetConfig) -> Vec { +fn find_magnet_links(text: &str, config: &MagnetConfig) -> Vec { let regex = if config.info_hash { &*INFO_HASH_REGEX } else { @@ -153,7 +153,7 @@ mod test { let text = "HELLO magnet:?xt=urn:btih:1234567890ABCDEF1234567890ABCDEF12345678&dn=hello+world WORLD"; let links = super::find_magnet_links( text, - &super::FindMagnetConfig { + &super::MagnetConfig { info_hash: false, override_existing: false, }, @@ -168,7 +168,7 @@ mod test { let text = "HELLO 1234567890ABCDEF1234567890ABCDEF12345678 WORLD"; let links = super::find_magnet_links( text, - &super::FindMagnetConfig { + &super::MagnetConfig { info_hash: true, override_existing: false, }, @@ -181,7 +181,7 @@ mod test { let text = "HELLO 1234567890ABCDEF1234567890ABCDEF12345678 WORLD"; let links = super::find_magnet_links( text, - &super::FindMagnetConfig { + &super::MagnetConfig { info_hash: false, override_existing: false, },