Merge pull request 'Implement paging' (#9) from 2-implement-paging into main

Reviewed-on: #9
2022-05-27 22:50:51 +02:00 · 2022-05-27 22:50:51 +02:00 · dafcdc009b
parent 09ee0b9ba9 0701088fbc
commit dafcdc009b
5 changed files with 117 additions and 27 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1598,6 +1598,7 @@ dependencies = [
 "rocket_dyn_templates",
 "rss",
 "thiserror",
+ "url",
 "youtube_dl",
 ]

--- a/Cargo.toml
+++ b/Cargo.toml
@ -15,6 +15,7 @@ rocket = { version = "0.5.0-rc.2", features = ["json"] }
 rocket_dyn_templates = { version = "0.1.0-rc.2", features = ["tera"] }
 rss = "2.0.1"
 thiserror = "1.0.31"
+url = "2.2.2"
 youtube_dl = { version = "0.7.0", features = ["tokio"] }

 [package.metadata.deb]
--- a/README.md
+++ b/README.md
@ -55,6 +55,15 @@ need to use for Podbringer is comprised of the following parts:
   The Podbringer location URL          Service  User @ service
 ```

+### Feed item limit
+
+To prevent feeds with a very large number of items, any feed that is returned
+contains at most 50 items by default. If you want to have more (or less) items,
+provide the limit in the URL by setting the `limit` parameter.
+
+For example, to get up until 1000 items the URL becomes:
+`https://my.domain.tld/podbringer/feed/mixcloud/myfavouriteband?limit=1000`
+
 ## License

 Podbringer is licensed under the MIT license (see the `LICENSE` file or
--- a/src/lib.rs
+++ b/src/lib.rs
@ -43,6 +43,9 @@ pub(crate) enum Error {
    #[error("Unknown supported back-end: {0}")]
    UnsupportedBackend(String),

+    #[error("URL parse error: {0}")]
+    UrlParse(#[from] url::ParseError),
+
    #[error("Youtube_dl failed: {0}")]
    YoutubeDl(#[from] youtube_dl::Error),
 }
@ -89,10 +92,17 @@ pub(crate) async fn download(file: PathBuf, backend: &str) -> Result<Redirect> {
 }

 /// Handler for retrieving the RSS feed of user on a certain back-end.
-#[get("/feed/<backend>/<username>")]
-async fn feed(backend: &str, username: &str, config: &State<Config>) -> Result<RssFeed> {
+///
+/// The limit parameter determines the maximum of items that can be in the feed.
+#[get("/feed/<backend>/<username>?<limit>")]
+async fn feed(
+    backend: &str,
+    username: &str,
+    limit: Option<usize>,
+    config: &State<Config>,
+) -> Result<RssFeed> {
    let user = mixcloud::user(username).await?;
-    let cloudcasts = mixcloud::cloudcasts(username).await?;
+    let cloudcasts = mixcloud::cloudcasts(username, limit).await?;
    let mut last_build = DateTime::<Utc>::from_utc(NaiveDateTime::from_timestamp(0, 0), Utc);

    let category = CategoryBuilder::default()
--- a/src/mixcloud.rs
+++ b/src/mixcloud.rs
@ -11,7 +11,7 @@ use youtube_dl::{YoutubeDl, YoutubeDlOutput};

 use super::{Error, Result};

-/// A Mixcloud user.
+/// A Mixcloud user (response).
 #[derive(Clone, Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
 pub(crate) struct User {
@ -36,12 +36,24 @@ pub(crate) struct Pictures {
    pub(crate) large: String,
 }

-/// The Mixcloud cloudcasts container.
-#[derive(Debug, Deserialize)]
+/// The Mixcloud cloudcasts response.
+#[derive(Clone, Debug, Deserialize)]
 #[serde(crate = "rocket::serde")]
-pub(crate) struct CloudcastData {
-    /// The contained cloudcasts.
-    data: Vec<Cloudcast>,
+pub(crate) struct CloudcastsResponse {
+    /// The contained cloudcast items.
+    #[serde(rename = "data")]
+    items: Vec<Cloudcast>,
+
+    /// The paging information.
+    paging: CloudcastsPaging,
+}
+
+/// The Mixcloud paging info.
+#[derive(Clone, Debug, Deserialize)]
+#[serde(crate = "rocket::serde")]
+pub(crate) struct CloudcastsPaging {
+    /// The API URL of the next page.
+    next: Option<String>,
 }

 /// A Mixcloud cloudcast.
@ -96,6 +108,9 @@ const DEFAULT_BITRATE: u32 = 64 * 1024;
 /// The default file (MIME) type used by Mixcloud.
 const DEFAULT_FILE_TYPE: &str = "audio/mpeg";

+/// The default page size.
+const DEFAULT_PAGE_SIZE: usize = 50;
+
 /// Returns the default file type used by Mixcloud.
 pub(crate) const fn default_file_type() -> &'static str {
    DEFAULT_FILE_TYPE
@ -109,39 +124,93 @@ pub(crate) fn estimated_file_size(duration: u32) -> u32 {
 }

 /// Retrieves the user data using the Mixcloud API.
-#[cached(
-    key = "String",
-    convert = r#"{ username.to_owned() }"#,
-    time = 3600,
-    result = true
-)]
 pub(crate) async fn user(username: &str) -> Result<User> {
    let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
    url.set_path(username);

    println!("⏬ Retrieving user {username} from {url}...");
+    fetch_user(url).await
+}
+
+/// Fetches the user from the URL.
+#[cached(
+    key = "String",
+    convert = r#"{ url.to_string() }"#,
+    time = 86400,
+    result = true
+)]
+///
+/// If the result is [`Ok`], the user will be cached for 24 hours for the given username.
+async fn fetch_user(url: Url) -> Result<User> {
    let response = reqwest::get(url).await?.error_for_status()?;
    let user = response.json().await?;

    Ok(user)
 }

-/// Retrieves the cloudcasts of the user using the Mixcloud API.
-#[cached(
-    key = "String",
-    convert = r#"{ username.to_owned() }"#,
-    time = 3600,
-    result = true
-)]
-pub(crate) async fn cloudcasts(username: &str) -> Result<Vec<Cloudcast>> {
+/// Retrieves the cloudcasts data of the user using the Mixcloud API.
+pub(crate) async fn cloudcasts(username: &str, limit: Option<usize>) -> Result<Vec<Cloudcast>> {
+    let mut limit = limit.unwrap_or(DEFAULT_PAGE_SIZE);
+    let mut offset = 0;
    let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
    url.set_path(&format!("{username}/cloudcasts/"));
-
    println!("⏬ Retrieving cloudcasts of user {username} from {url}...");
-    let response = reqwest::get(url).await?.error_for_status()?;
-    let cloudcasts: CloudcastData = response.json().await?;

-    Ok(cloudcasts.data)
+    set_paging_query(&mut url, limit, offset);
+    let mut cloudcasts = Vec::with_capacity(50); // The initial limit
+    loop {
+        let cloudcasts_res: CloudcastsResponse = fetch_cloudcasts(url).await?;
+        let count = cloudcasts_res.items.len();
+        cloudcasts.extend(cloudcasts_res.items);
+
+        // Continue onto the next URL in the paging, if there is one.
+        limit = limit.saturating_sub(count);
+        offset += count;
+        match cloudcasts_res.paging.next {
+            Some(next_url) => {
+                url = Url::parse(&next_url)?;
+                set_paging_query(&mut url, limit, offset);
+            }
+            None => break,
+        }
+
+        // We have reached the limit.
+        if limit == 0 {
+            break;
+        }
+    }
+
+    Ok(cloudcasts)
+}
+
+/// Fetches cloudcasts from the URL.
+///
+/// If the result is [`Ok`], the cloudcasts will be cached for 24 hours for the given username.
+#[cached(
+    key = "String",
+    convert = r#"{ url.to_string() }"#,
+    time = 86400,
+    result = true
+)]
+async fn fetch_cloudcasts(url: Url) -> Result<CloudcastsResponse> {
+    let response = reqwest::get(url).await?.error_for_status()?;
+    let cloudcasts_res = response.json().await?;
+
+    Ok(cloudcasts_res)
+}
+
+/// Set paging query pairs for URL.
+///
+/// The limit is capped to the default page size. Another request will be necessary to retrieve
+/// more.
+fn set_paging_query(url: &mut Url, limit: usize, offset: usize) {
+    url.query_pairs_mut()
+        .clear()
+        .append_pair(
+            "limit",
+            &format!("{}", std::cmp::min(limit, DEFAULT_PAGE_SIZE)),
+        )
+        .append_pair("offset", &format!("{}", offset));
 }

 /// Retrieves the redirect URL for the provided Mixcloud cloudcast key.