From 78fc93fedfffa7d1255fb0e3db0ce735b3ceb9bc Mon Sep 17 00:00:00 2001
From: Paul van Tilburg <paul@luon.net>
Date: Thu, 26 May 2022 22:06:22 +0200
Subject: [PATCH 1/3] Retrieve all pages by following the next URL

* Derserialize the paging information
* Parse each next URL; handle URL parse errors
* Use a default page size of 50; pass offset 0 to count by item index
---
 Cargo.lock      |  1 +
 Cargo.toml      |  1 +
 src/lib.rs      |  3 +++
 src/mixcloud.rs | 44 ++++++++++++++++++++++++++++++++++++--------
 4 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 5f9d164..fa3d428 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1598,6 +1598,7 @@ dependencies = [
  "rocket_dyn_templates",
  "rss",
  "thiserror",
+ "url",
  "youtube_dl",
 ]
 
diff --git a/Cargo.toml b/Cargo.toml
index 37c39d9..7949ce1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ rocket = { version = "0.5.0-rc.2", features = ["json"] }
 rocket_dyn_templates = { version = "0.1.0-rc.2", features = ["tera"] }
 rss = "2.0.1"
 thiserror = "1.0.31"
+url = "2.2.2"
 youtube_dl = { version = "0.7.0", features = ["tokio"] }
 
 [package.metadata.deb]
diff --git a/src/lib.rs b/src/lib.rs
index c6e3ea7..0551f80 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -43,6 +43,9 @@ pub(crate) enum Error {
     #[error("Unknown supported back-end: {0}")]
     UnsupportedBackend(String),
 
+    #[error("URL parse error: {0}")]
+    UrlParse(#[from] url::ParseError),
+
     #[error("Youtube_dl failed: {0}")]
     YoutubeDl(#[from] youtube_dl::Error),
 }
diff --git a/src/mixcloud.rs b/src/mixcloud.rs
index 0390186..86a60b7 100644
--- a/src/mixcloud.rs
+++ b/src/mixcloud.rs
@@ -11,7 +11,7 @@ use youtube_dl::{YoutubeDl, YoutubeDlOutput};
 
 use super::{Error, Result};
 
-/// A Mixcloud user.
+/// A Mixcloud user (response).
 #[derive(Clone, Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
 pub(crate) struct User {
@@ -36,12 +36,24 @@ pub(crate) struct Pictures {
     pub(crate) large: String,
 }
 
-/// The Mixcloud cloudcasts container.
+/// The Mixcloud cloudcasts response.
 #[derive(Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
-pub(crate) struct CloudcastData {
-    /// The contained cloudcasts.
-    data: Vec<Cloudcast>,
+pub(crate) struct CloudcastsResponse {
+    /// The contained cloudcast items.
+    #[serde(rename = "data")]
+    items: Vec<Cloudcast>,
+
+    /// The paging information.
+    paging: CloudcastsPaging,
+}
+
+/// The Mixcloud paging info.
+#[derive(Debug, Deserialize)]
+#[serde(crate = "rocket::serde")]
+pub(crate) struct CloudcastsPaging {
+    /// The API URL of the next page.
+    next: Option<String>,
 }
 
 /// A Mixcloud cloudcast.
@@ -96,6 +108,9 @@ const DEFAULT_BITRATE: u32 = 64 * 1024;
 /// The default file (MIME) type used by Mixcloud.
 const DEFAULT_FILE_TYPE: &str = "audio/mpeg";
 
+/// The default page size.
+const DEFAULT_PAGE_SIZE: &str = "50";
+
 /// Returns the default file type used by Mixcloud.
 pub(crate) const fn default_file_type() -> &'static str {
     DEFAULT_FILE_TYPE
@@ -136,12 +151,25 @@ pub(crate) async fn user(username: &str) -> Result<User> {
 pub(crate) async fn cloudcasts(username: &str) -> Result<Vec<Cloudcast>> {
     let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
     url.set_path(&format!("{username}/cloudcasts/"));
+    url.query_pairs_mut()
+        .append_pair("limit", DEFAULT_PAGE_SIZE)
+        .append_pair("offset", "0");
 
     println!("⏬ Retrieving cloudcasts of user {username} from {url}...");
-    let response = reqwest::get(url).await?.error_for_status()?;
-    let cloudcasts: CloudcastData = response.json().await?;
+    let mut cloudcasts = Vec::with_capacity(50); // The initial limit
+    loop {
+        let response = reqwest::get(url).await?.error_for_status()?;
+        let cloudcasts_res: CloudcastsResponse = response.json().await?;
+        cloudcasts.extend(cloudcasts_res.items);
 
-    Ok(cloudcasts.data)
+        // Continue onto the next URL in the paging, if there is one.
+        match cloudcasts_res.paging.next {
+            Some(next_url) => url = Url::parse(&next_url)?,
+            None => break,
+        }
+    }
+
+    Ok(cloudcasts)
 }
 
 /// Retrieves the redirect URL for the provided Mixcloud cloudcast key.
-- 
2.43.4


From c13ce71c6998b4814092f14aeefb798f85b93ab2 Mon Sep 17 00:00:00 2001
From: Paul van Tilburg <paul@luon.net>
Date: Fri, 27 May 2022 22:31:17 +0200
Subject: [PATCH 2/3] Add feed item limit support

* The feed item limit defaults to the default page size (50) if not
  provided
* Move caching from response to URL fetch results; add helper functions
* Add a helper function to set the paging query of an URL
* Modify paging so we don't retrieve more than the feed item limit
---
 src/lib.rs      | 11 ++++--
 src/mixcloud.rs | 89 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 0551f80..43a7d1a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -92,10 +92,15 @@ pub(crate) async fn download(file: PathBuf, backend: &str) -> Result<Redirect> {
 }
 
 /// Handler for retrieving the RSS feed of user on a certain back-end.
-#[get("/feed/<backend>/<username>")]
-async fn feed(backend: &str, username: &str, config: &State<Config>) -> Result<RssFeed> {
+#[get("/feed/<backend>/<username>?<limit>")]
+async fn feed(
+    backend: &str,
+    username: &str,
+    limit: Option<usize>,
+    config: &State<Config>,
+) -> Result<RssFeed> {
     let user = mixcloud::user(username).await?;
-    let cloudcasts = mixcloud::cloudcasts(username).await?;
+    let cloudcasts = mixcloud::cloudcasts(username, limit).await?;
     let mut last_build = DateTime::<Utc>::from_utc(NaiveDateTime::from_timestamp(0, 0), Utc);
 
     let category = CategoryBuilder::default()
diff --git a/src/mixcloud.rs b/src/mixcloud.rs
index 86a60b7..d719c45 100644
--- a/src/mixcloud.rs
+++ b/src/mixcloud.rs
@@ -37,7 +37,7 @@ pub(crate) struct Pictures {
 }
 
 /// The Mixcloud cloudcasts response.
-#[derive(Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
 pub(crate) struct CloudcastsResponse {
     /// The contained cloudcast items.
@@ -49,7 +49,7 @@ pub(crate) struct CloudcastsResponse {
 }
 
 /// The Mixcloud paging info.
-#[derive(Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
 pub(crate) struct CloudcastsPaging {
     /// The API URL of the next page.
@@ -109,7 +109,7 @@ const DEFAULT_BITRATE: u32 = 64 * 1024;
 const DEFAULT_FILE_TYPE: &str = "audio/mpeg";
 
 /// The default page size.
-const DEFAULT_PAGE_SIZE: &str = "50";
+const DEFAULT_PAGE_SIZE: usize = 50;
 
 /// Returns the default file type used by Mixcloud.
 pub(crate) const fn default_file_type() -> &'static str {
@@ -124,54 +124,95 @@ pub(crate) fn estimated_file_size(duration: u32) -> u32 {
 }
 
 /// Retrieves the user data using the Mixcloud API.
-#[cached(
-    key = "String",
-    convert = r#"{ username.to_owned() }"#,
-    time = 3600,
-    result = true
-)]
 pub(crate) async fn user(username: &str) -> Result<User> {
     let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
     url.set_path(username);
 
     println!("⏬ Retrieving user {username} from {url}...");
+    fetch_user(url).await
+}
+
+/// Fetches the user from the URL.
+#[cached(
+    key = "String",
+    convert = r#"{ url.to_string() }"#,
+    time = 86400,
+    result = true
+)]
+///
+/// If the result is [`Ok`], the user will be cached for 24 hours for the given username.
+async fn fetch_user(url: Url) -> Result<User> {
     let response = reqwest::get(url).await?.error_for_status()?;
     let user = response.json().await?;
 
     Ok(user)
 }
 
-/// Retrieves the cloudcasts of the user using the Mixcloud API.
-#[cached(
-    key = "String",
-    convert = r#"{ username.to_owned() }"#,
-    time = 3600,
-    result = true
-)]
-pub(crate) async fn cloudcasts(username: &str) -> Result<Vec<Cloudcast>> {
+/// Retrieves the cloudcasts data of the user using the Mixcloud API.
+pub(crate) async fn cloudcasts(username: &str, limit: Option<usize>) -> Result<Vec<Cloudcast>> {
+    let mut limit = limit.unwrap_or(DEFAULT_PAGE_SIZE);
+    let mut offset = 0;
     let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
     url.set_path(&format!("{username}/cloudcasts/"));
-    url.query_pairs_mut()
-        .append_pair("limit", DEFAULT_PAGE_SIZE)
-        .append_pair("offset", "0");
-
     println!("⏬ Retrieving cloudcasts of user {username} from {url}...");
+
+    set_paging_query(&mut url, limit, offset);
     let mut cloudcasts = Vec::with_capacity(50); // The initial limit
     loop {
-        let response = reqwest::get(url).await?.error_for_status()?;
-        let cloudcasts_res: CloudcastsResponse = response.json().await?;
+        let cloudcasts_res: CloudcastsResponse = fetch_cloudcasts(url).await?;
+        let count = cloudcasts_res.items.len();
         cloudcasts.extend(cloudcasts_res.items);
 
         // Continue onto the next URL in the paging, if there is one.
+        limit = limit.saturating_sub(count);
+        offset += count;
         match cloudcasts_res.paging.next {
-            Some(next_url) => url = Url::parse(&next_url)?,
+            Some(next_url) => {
+                url = Url::parse(&next_url)?;
+                set_paging_query(&mut url, limit, offset);
+            }
             None => break,
         }
+
+        // We have reached the limit.
+        if limit == 0 {
+            break;
+        }
     }
 
     Ok(cloudcasts)
 }
 
+/// Fetches cloudcasts from the URL.
+///
+/// If the result is [`Ok`], the cloudcasts will be cached for 24 hours for the given username.
+#[cached(
+    key = "String",
+    convert = r#"{ url.to_string() }"#,
+    time = 86400,
+    result = true
+)]
+async fn fetch_cloudcasts(url: Url) -> Result<CloudcastsResponse> {
+    let response = reqwest::get(url).await?.error_for_status()?;
+    let cloudcasts_res = response.json().await?;
+
+    Ok(cloudcasts_res)
+}
+
+/// Set paging query pairs for URL.
+///
+/// The limit is capped to the default page size. Another request will be necessary to retrieve
+/// more.
+fn set_paging_query(url: &mut Url, limit: usize, offset: usize) {
+    url.query_pairs_mut()
+        .clear()
+        .append_pair(
+            "limit",
+            &format!("{}", std::cmp::min(limit, DEFAULT_PAGE_SIZE)),
+        )
+        .append_pair("offset", &format!("{}", offset));
+}
+
 /// Retrieves the redirect URL for the provided Mixcloud cloudcast key.
 #[cached(
     key = "String",
-- 
2.43.4


From 0701088fbcdf28a05e4031aae287ed9e2c90c31b Mon Sep 17 00:00:00 2001
From: Paul van Tilburg <paul@luon.net>
Date: Fri, 27 May 2022 22:47:36 +0200
Subject: [PATCH 3/3] Update the documentation

---
 README.md  | 9 +++++++++
 src/lib.rs | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/README.md b/README.md
index 7e8ca26..8064bc8 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,15 @@ need to use for Podbringer is comprised of the following parts:
    The Podbringer location URL          Service  User @ service
 ```
 
+### Feed item limit
+
+To prevent feeds with a very large number of items, any feed that is returned
+contains at most 50 items by default. If you want to have more (or less) items,
+provide the limit in the URL by setting the `limit` parameter.
+
+For example, to get up until 1000 items the URL becomes:
+`https://my.domain.tld/podbringer/feed/mixcloud/myfavouriteband?limit=1000`
+
 ## License
 
 Podbringer is licensed under the MIT license (see the `LICENSE` file or
diff --git a/src/lib.rs b/src/lib.rs
index 43a7d1a..2af6353 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -92,6 +92,8 @@ pub(crate) async fn download(file: PathBuf, backend: &str) -> Result<Redirect> {
 }
 
 /// Handler for retrieving the RSS feed of user on a certain back-end.
+///
+/// The limit parameter determines the maximum of items that can be in the feed.
 #[get("/feed/<backend>/<username>?<limit>")]
 async fn feed(
     backend: &str,
-- 
2.43.4