-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: handle different body fields (#100)
This PR clarifies the concept for "body" used in code and config. Fixes #95 and #96. ## Motivation Previously, I name a generic field in the code "description" to distinguish it from the title. For rss format it refers to the [`description` field](https://github.com/shouya/rss-funnel/blob/dc1efac19a96e06143b75e9495adb3f6b013a75f/src/feed.rs#L348) and for atom it refers to the [`content` field](https://github.com/shouya/rss-funnel/blob/dc1efac19a96e06143b75e9495adb3f6b013a75f/src/feed.rs#L368). The choice of the name and the selected fields are purely arbitrary based on the few example feeds I had in hand. Overall, it is supposed be the field that ultimately get displayed in rss feeder beneath the title. In this PR I renamed the general term to "body". Unlike the old notion, a post can have multiple `body` fields. We need this if we want to handle all types of different fields that considered as body in the RSS reader. For example, if we consider all the body fields, then we can correctly filter posts matching certain keyword using the `keep_only` and `discard` filter (#95). In addition, some feeds do not use the typical body fields. On example is YouTube, who puts the video description in the `media:description` field under the `media:group` tag (#92). And we hope to support filtering on this field as well. ## Implementation First, I removed the single-field accessor for `Post.description` field. Then I provided various APIs for accessing the bodies: + `Post.bodies_mut` + `Post.bodies` + `Post.modify_bodies` + `Post.first_body` + `Post.first_body_mut` + `Post.create_body` + `Post.ensure_body` The following fields are considered as body fields: - rss + `content` + `description` + `media:description` + `itunes:summary` - atom + `content` + `summary` + `media:description` ## Config changes - Rename the `content` variant to `body` of the `field` field for `keep_only`/`discard` filter. - Rename the `description_selector` field to `body_selector` for the `extract` filter. Both changes are backward compatible. The old fields are currently marked deprecated, and may be removed in a future breaking release. ## Checklist - [ ] update filter docs - [x] review all usage of the term "description" in code
- Loading branch information
Showing
8 changed files
with
317 additions
and
133 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
use std::collections::BTreeMap; | ||
|
||
pub struct TagRef<'a> { | ||
pub name: &'a String, | ||
pub attrs: &'a BTreeMap<String, String>, | ||
pub value: &'a Option<String>, | ||
} | ||
|
||
pub struct TagRefMut<'a> { | ||
pub name: &'a mut String, | ||
pub attrs: &'a mut BTreeMap<String, String>, | ||
pub value: &'a mut Option<String>, | ||
} | ||
|
||
pub trait ExtensionExt { | ||
fn tags(&self) -> Vec<TagRef>; | ||
fn tags_mut(&mut self) -> Vec<TagRefMut>; | ||
|
||
fn tags_mut_with_names(&mut self, names: &[&str]) -> Vec<TagRefMut> { | ||
self | ||
.tags_mut() | ||
.into_iter() | ||
.filter(|tag| names.contains(&tag.name.as_str())) | ||
.collect() | ||
} | ||
|
||
fn tags_with_names(&self, names: &[&str]) -> Vec<TagRef> { | ||
self | ||
.tags() | ||
.into_iter() | ||
.filter(|tag| names.contains(&tag.name.as_str())) | ||
.collect() | ||
} | ||
} | ||
|
||
macro_rules! impl_extension_ext { | ||
($ty:ty) => { | ||
impl ExtensionExt for $ty { | ||
fn tags(&self) -> Vec<TagRef> { | ||
let tag = TagRef { | ||
name: &self.name, | ||
attrs: &self.attrs, | ||
value: &self.value, | ||
}; | ||
|
||
let mut tags = vec![tag]; | ||
for children in self.children.values() { | ||
tags.extend(children.iter().flat_map(|ext| ext.tags())); | ||
} | ||
tags | ||
} | ||
|
||
fn tags_mut(&mut self) -> Vec<TagRefMut> { | ||
let tag = TagRefMut { | ||
name: &mut self.name, | ||
attrs: &mut self.attrs, | ||
value: &mut self.value, | ||
}; | ||
|
||
let mut tags = vec![tag]; | ||
for children in self.children.values_mut() { | ||
tags.extend(children.iter_mut().flat_map(|ext| ext.tags_mut())); | ||
} | ||
tags | ||
} | ||
} | ||
}; | ||
} | ||
|
||
// These two structs has exactly the same structure but are different | ||
// types since they belong to different crates. | ||
impl_extension_ext!(atom_syndication::extension::Extension); | ||
impl_extension_ext!(rss::extension::Extension); | ||
|
||
impl<T> ExtensionExt for BTreeMap<String, BTreeMap<String, Vec<T>>> | ||
where | ||
T: ExtensionExt, | ||
{ | ||
fn tags(&self) -> Vec<TagRef> { | ||
self | ||
.values() | ||
.flat_map(|children| { | ||
children | ||
.values() | ||
.flat_map(|exts| exts.iter().flat_map(|ext| ext.tags())) | ||
}) | ||
.collect() | ||
} | ||
|
||
fn tags_mut(&mut self) -> Vec<TagRefMut> { | ||
self | ||
.values_mut() | ||
.flat_map(|children| { | ||
children | ||
.values_mut() | ||
.flat_map(|exts| exts.iter_mut().flat_map(|ext| ext.tags_mut())) | ||
}) | ||
.collect() | ||
} | ||
} |
Oops, something went wrong.