From 78fc93fedfffa7d1255fb0e3db0ce735b3ceb9bc Mon Sep 17 00:00:00 2001 From: Paul van Tilburg Date: Thu, 26 May 2022 22:06:22 +0200 Subject: [PATCH 1/3] Retrieve all pages by following the next URL * Derserialize the paging information * Parse each next URL; handle URL parse errors * Use a default page size of 50; pass offset 0 to count by item index --- Cargo.lock | 1 + Cargo.toml | 1 + src/lib.rs | 3 +++ src/mixcloud.rs | 44 ++++++++++++++++++++++++++++++++++++-------- 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f9d164..fa3d428 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1598,6 +1598,7 @@ dependencies = [ "rocket_dyn_templates", "rss", "thiserror", + "url", "youtube_dl", ] diff --git a/Cargo.toml b/Cargo.toml index 37c39d9..7949ce1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ rocket = { version = "0.5.0-rc.2", features = ["json"] } rocket_dyn_templates = { version = "0.1.0-rc.2", features = ["tera"] } rss = "2.0.1" thiserror = "1.0.31" +url = "2.2.2" youtube_dl = { version = "0.7.0", features = ["tokio"] } [package.metadata.deb] diff --git a/src/lib.rs b/src/lib.rs index c6e3ea7..0551f80 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,9 @@ pub(crate) enum Error { #[error("Unknown supported back-end: {0}")] UnsupportedBackend(String), + #[error("URL parse error: {0}")] + UrlParse(#[from] url::ParseError), + #[error("Youtube_dl failed: {0}")] YoutubeDl(#[from] youtube_dl::Error), } diff --git a/src/mixcloud.rs b/src/mixcloud.rs index 0390186..86a60b7 100644 --- a/src/mixcloud.rs +++ b/src/mixcloud.rs @@ -11,7 +11,7 @@ use youtube_dl::{YoutubeDl, YoutubeDlOutput}; use super::{Error, Result}; -/// A Mixcloud user. +/// A Mixcloud user (response). #[derive(Clone, Debug, Deserialize)] #[serde(crate = "rocket::serde")] pub(crate) struct User { @@ -36,12 +36,24 @@ pub(crate) struct Pictures { pub(crate) large: String, } -/// The Mixcloud cloudcasts container. +/// The Mixcloud cloudcasts response. #[derive(Debug, Deserialize)] #[serde(crate = "rocket::serde")] -pub(crate) struct CloudcastData { - /// The contained cloudcasts. - data: Vec, +pub(crate) struct CloudcastsResponse { + /// The contained cloudcast items. + #[serde(rename = "data")] + items: Vec, + + /// The paging information. + paging: CloudcastsPaging, +} + +/// The Mixcloud paging info. +#[derive(Debug, Deserialize)] +#[serde(crate = "rocket::serde")] +pub(crate) struct CloudcastsPaging { + /// The API URL of the next page. + next: Option, } /// A Mixcloud cloudcast. @@ -96,6 +108,9 @@ const DEFAULT_BITRATE: u32 = 64 * 1024; /// The default file (MIME) type used by Mixcloud. const DEFAULT_FILE_TYPE: &str = "audio/mpeg"; +/// The default page size. +const DEFAULT_PAGE_SIZE: &str = "50"; + /// Returns the default file type used by Mixcloud. pub(crate) const fn default_file_type() -> &'static str { DEFAULT_FILE_TYPE @@ -136,12 +151,25 @@ pub(crate) async fn user(username: &str) -> Result { pub(crate) async fn cloudcasts(username: &str) -> Result> { let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed"); url.set_path(&format!("{username}/cloudcasts/")); + url.query_pairs_mut() + .append_pair("limit", DEFAULT_PAGE_SIZE) + .append_pair("offset", "0"); println!("⏬ Retrieving cloudcasts of user {username} from {url}..."); - let response = reqwest::get(url).await?.error_for_status()?; - let cloudcasts: CloudcastData = response.json().await?; + let mut cloudcasts = Vec::with_capacity(50); // The initial limit + loop { + let response = reqwest::get(url).await?.error_for_status()?; + let cloudcasts_res: CloudcastsResponse = response.json().await?; + cloudcasts.extend(cloudcasts_res.items); - Ok(cloudcasts.data) + // Continue onto the next URL in the paging, if there is one. + match cloudcasts_res.paging.next { + Some(next_url) => url = Url::parse(&next_url)?, + None => break, + } + } + + Ok(cloudcasts) } /// Retrieves the redirect URL for the provided Mixcloud cloudcast key. -- 2.43.4 From c13ce71c6998b4814092f14aeefb798f85b93ab2 Mon Sep 17 00:00:00 2001 From: Paul van Tilburg Date: Fri, 27 May 2022 22:31:17 +0200 Subject: [PATCH 2/3] Add feed item limit support * The feed item limit defaults to the default page size (50) if not provided * Move caching from response to URL fetch results; add helper functions * Add a helper function to set the paging query of an URL * Modify paging so we don't retrieve more than the feed item limit --- src/lib.rs | 11 ++++-- src/mixcloud.rs | 89 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 73 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0551f80..43a7d1a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,10 +92,15 @@ pub(crate) async fn download(file: PathBuf, backend: &str) -> Result { } /// Handler for retrieving the RSS feed of user on a certain back-end. -#[get("/feed//")] -async fn feed(backend: &str, username: &str, config: &State) -> Result { +#[get("/feed//?")] +async fn feed( + backend: &str, + username: &str, + limit: Option, + config: &State, +) -> Result { let user = mixcloud::user(username).await?; - let cloudcasts = mixcloud::cloudcasts(username).await?; + let cloudcasts = mixcloud::cloudcasts(username, limit).await?; let mut last_build = DateTime::::from_utc(NaiveDateTime::from_timestamp(0, 0), Utc); let category = CategoryBuilder::default() diff --git a/src/mixcloud.rs b/src/mixcloud.rs index 86a60b7..d719c45 100644 --- a/src/mixcloud.rs +++ b/src/mixcloud.rs @@ -37,7 +37,7 @@ pub(crate) struct Pictures { } /// The Mixcloud cloudcasts response. -#[derive(Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize)] #[serde(crate = "rocket::serde")] pub(crate) struct CloudcastsResponse { /// The contained cloudcast items. @@ -49,7 +49,7 @@ pub(crate) struct CloudcastsResponse { } /// The Mixcloud paging info. -#[derive(Debug, Deserialize)] +#[derive(Clone, Debug, Deserialize)] #[serde(crate = "rocket::serde")] pub(crate) struct CloudcastsPaging { /// The API URL of the next page. @@ -109,7 +109,7 @@ const DEFAULT_BITRATE: u32 = 64 * 1024; const DEFAULT_FILE_TYPE: &str = "audio/mpeg"; /// The default page size. -const DEFAULT_PAGE_SIZE: &str = "50"; +const DEFAULT_PAGE_SIZE: usize = 50; /// Returns the default file type used by Mixcloud. pub(crate) const fn default_file_type() -> &'static str { @@ -124,54 +124,95 @@ pub(crate) fn estimated_file_size(duration: u32) -> u32 { } /// Retrieves the user data using the Mixcloud API. -#[cached( - key = "String", - convert = r#"{ username.to_owned() }"#, - time = 3600, - result = true -)] pub(crate) async fn user(username: &str) -> Result { let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed"); url.set_path(username); println!("⏬ Retrieving user {username} from {url}..."); + fetch_user(url).await +} + +/// Fetches the user from the URL. +#[cached( + key = "String", + convert = r#"{ url.to_string() }"#, + time = 86400, + result = true +)] +/// +/// If the result is [`Ok`], the user will be cached for 24 hours for the given username. +async fn fetch_user(url: Url) -> Result { let response = reqwest::get(url).await?.error_for_status()?; let user = response.json().await?; Ok(user) } -/// Retrieves the cloudcasts of the user using the Mixcloud API. -#[cached( - key = "String", - convert = r#"{ username.to_owned() }"#, - time = 3600, - result = true -)] -pub(crate) async fn cloudcasts(username: &str) -> Result> { +/// Retrieves the cloudcasts data of the user using the Mixcloud API. +pub(crate) async fn cloudcasts(username: &str, limit: Option) -> Result> { + let mut limit = limit.unwrap_or(DEFAULT_PAGE_SIZE); + let mut offset = 0; let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed"); url.set_path(&format!("{username}/cloudcasts/")); - url.query_pairs_mut() - .append_pair("limit", DEFAULT_PAGE_SIZE) - .append_pair("offset", "0"); - println!("⏬ Retrieving cloudcasts of user {username} from {url}..."); + + set_paging_query(&mut url, limit, offset); let mut cloudcasts = Vec::with_capacity(50); // The initial limit loop { - let response = reqwest::get(url).await?.error_for_status()?; - let cloudcasts_res: CloudcastsResponse = response.json().await?; + let cloudcasts_res: CloudcastsResponse = fetch_cloudcasts(url).await?; + let count = cloudcasts_res.items.len(); cloudcasts.extend(cloudcasts_res.items); // Continue onto the next URL in the paging, if there is one. + limit = limit.saturating_sub(count); + offset += count; match cloudcasts_res.paging.next { - Some(next_url) => url = Url::parse(&next_url)?, + Some(next_url) => { + url = Url::parse(&next_url)?; + set_paging_query(&mut url, limit, offset); + } None => break, } + + // We have reached the limit. + if limit == 0 { + break; + } } Ok(cloudcasts) } +/// Fetches cloudcasts from the URL. +/// +/// If the result is [`Ok`], the cloudcasts will be cached for 24 hours for the given username. +#[cached( + key = "String", + convert = r#"{ url.to_string() }"#, + time = 86400, + result = true +)] +async fn fetch_cloudcasts(url: Url) -> Result { + let response = reqwest::get(url).await?.error_for_status()?; + let cloudcasts_res = response.json().await?; + + Ok(cloudcasts_res) +} + +/// Set paging query pairs for URL. +/// +/// The limit is capped to the default page size. Another request will be necessary to retrieve +/// more. +fn set_paging_query(url: &mut Url, limit: usize, offset: usize) { + url.query_pairs_mut() + .clear() + .append_pair( + "limit", + &format!("{}", std::cmp::min(limit, DEFAULT_PAGE_SIZE)), + ) + .append_pair("offset", &format!("{}", offset)); +} + /// Retrieves the redirect URL for the provided Mixcloud cloudcast key. #[cached( key = "String", -- 2.43.4 From 0701088fbcdf28a05e4031aae287ed9e2c90c31b Mon Sep 17 00:00:00 2001 From: Paul van Tilburg Date: Fri, 27 May 2022 22:47:36 +0200 Subject: [PATCH 3/3] Update the documentation --- README.md | 9 +++++++++ src/lib.rs | 2 ++ 2 files changed, 11 insertions(+) diff --git a/README.md b/README.md index 7e8ca26..8064bc8 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,15 @@ need to use for Podbringer is comprised of the following parts: The Podbringer location URL Service User @ service ``` +### Feed item limit + +To prevent feeds with a very large number of items, any feed that is returned +contains at most 50 items by default. If you want to have more (or less) items, +provide the limit in the URL by setting the `limit` parameter. + +For example, to get up until 1000 items the URL becomes: +`https://my.domain.tld/podbringer/feed/mixcloud/myfavouriteband?limit=1000` + ## License Podbringer is licensed under the MIT license (see the `LICENSE` file or diff --git a/src/lib.rs b/src/lib.rs index 43a7d1a..2af6353 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,6 +92,8 @@ pub(crate) async fn download(file: PathBuf, backend: &str) -> Result { } /// Handler for retrieving the RSS feed of user on a certain back-end. +/// +/// The limit parameter determines the maximum of items that can be in the feed. #[get("/feed//?")] async fn feed( backend: &str, -- 2.43.4