Add back-end abstraction; refactor Mixcloud back-end (closes: #10)

* Add a `backend` module `Backend` trait and necessary abstract types
* Refactor handlers to use the back-end abstraction
* Directly serialize to URLs where necessary in Mixcloud back-end
* Require `serde` feature for the url crate
This commit is contained in:
Paul van Tilburg 2022-08-13 15:14:15 +02:00
parent 218e714b03
commit bc9a9e307d
Signed by: paul
GPG Key ID: C6DE073EDA9EEC4D
5 changed files with 307 additions and 122 deletions

2
Cargo.lock generated
View File

@ -1518,6 +1518,7 @@ checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
name = "podbringer"
version = "0.2.0"
dependencies = [
"async-trait",
"cached",
"chrono",
"reqwest",
@ -2448,6 +2449,7 @@ dependencies = [
"idna",
"matches",
"percent-encoding",
"serde",
]
[[package]]

View File

@ -8,6 +8,7 @@ readme = "README.md"
license = "MIT"
[dependencies]
async-trait = "0.1.57"
cached = { version = "0.38.0", features = ["async"] }
chrono = { version = "0.4.19", features = ["serde"] }
reqwest = { version = "0.11.10", features = ["json"] }
@ -15,7 +16,7 @@ rocket = { version = "0.5.0-rc.2", features = ["json"] }
rocket_dyn_templates = { version = "0.1.0-rc.2", features = ["tera"] }
rss = "2.0.1"
thiserror = "1.0.31"
url = "2.2.2"
url = { version = "2.2.2", features = ["serde"] }
youtube_dl = { version = "0.7.0", features = ["tokio"] }
[package.metadata.deb]

105
src/backends.rs Normal file
View File

@ -0,0 +1,105 @@
//! The supported content back-ends.
//!
//! A content back-end should provide two kinds of objects: channels and their (content) items.
//! It must provide a methods to retrieve a channel and its items and a method to return the
//! redirect URL for some path that points to media within context of the back-end.
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use reqwest::Url;
use crate::Result;
pub(crate) mod mixcloud;
/// Functionality of a content back-end.
#[async_trait]
pub(crate) trait Backend {
/// Returns the name of the backend.
fn name(&self) -> &'static str;
/// Returns the channel with its currently contained content items.
async fn channel(&self, channel_id: &str, item_limit: Option<usize>) -> Result<Channel>;
/// Returns the redirect URL for the provided download file path.
async fn redirect_url(&self, file: &Path) -> Result<String>;
}
/// The metadata of a collection of content items.
pub(crate) struct Channel {
/// The title of the channel.
pub(crate) title: String,
/// The link to the channel.
pub(crate) link: Url,
/// The description of the channel.
pub(crate) description: String,
/// The author/composer/creator of the channel.
pub(crate) author: Option<String>,
/// The categories associated with the channel.
///
/// The first category is considered to be the "main" category.
pub(crate) categories: Vec<String>,
/// The URL of the image/logo/avatar of a channel.
pub(crate) image: Url,
/// The contained content items.
pub(crate) items: Vec<Item>,
}
/// A content item belonging to a channel.
pub(crate) struct Item {
/// The title of the item.
pub(crate) title: String,
/// The direct link to the item.
pub(crate) link: Url,
/// The description of the item.
pub(crate) description: Option<String>,
/// The categories of the items (and their domain URLs).
pub(crate) categories: HashMap<String, Url>,
/// The enclosed media content of the item,
pub(crate) enclosure: Enclosure,
/// The duration of the media content (in seconds).
pub(crate) duration: Option<u32>,
/// The global UID of the item.
///
/// This GUID is not considered nor needs to be a permalink.
pub(crate) guid: String,
/// The keywords associated with the item.
pub(crate) keywords: Vec<String>,
/// The URL of the image of the item.
pub(crate) image: Url,
/// The timestamp the item was last updated.
pub(crate) updated_at: DateTime<Utc>,
}
/// The enclosed media content of an item.
pub(crate) struct Enclosure {
/// The path of the download file associated with the item enclosure.
///
/// This is used as a part of the enclosure URL of the item and will be passed to
/// [`Backend::redirect_url`] later when a client wants to download the media content.
pub(crate) file: PathBuf,
/// The MIME type of the download file path associated with the item enclosure.
pub(crate) mime_type: String,
/// The length of the enclosed media content (in bytes).
pub(crate) length: u32,
}

View File

@ -3,13 +3,95 @@
//! It uses the Mixcloud API to retrieve the feed (user) and items (cloudcasts)).
//! See also: <https://www.mixcloud.com/developers/>
use std::path::{Path, PathBuf};
use async_trait::async_trait;
use cached::proc_macro::cached;
use chrono::{DateTime, Utc};
use reqwest::Url;
use rocket::serde::Deserialize;
use youtube_dl::{YoutubeDl, YoutubeDlOutput};
use super::{Error, Result};
use super::{Channel, Enclosure, Item};
use crate::{Error, Result};
/// The base URL for the Mixcloud API.
const API_BASE_URL: &str = "https://api.mixcloud.com";
/// The base URL for downloading Mixcloud files.
const FILES_BASE_URL: &str = "https://www.mixcloud.com";
/// The default bitrate used by Mixcloud.
const DEFAULT_BITRATE: u32 = 64 * 1024;
/// The default file (MIME) type used by Mixcloud.
const DEFAULT_FILE_TYPE: &str = "audio/mpeg";
/// The default page size.
const DEFAULT_PAGE_SIZE: usize = 50;
/// Creates a Mixcloud back-end.
pub(crate) fn backend() -> Backend {
Backend
}
/// The Mixcloud back-end.
pub struct Backend;
#[async_trait]
impl super::Backend for Backend {
fn name(&self) -> &'static str {
"Mixcloud"
}
async fn channel(&self, channel_id: &str, item_limit: Option<usize>) -> Result<Channel> {
// For Mixcloud a channel ID is some user name.
let mut user_url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
user_url.set_path(channel_id);
println!("⏬ Retrieving user {channel_id} from {user_url}...");
let user = fetch_user(user_url).await?;
// The items of a channel are the user's cloudcasts.
let mut limit = item_limit.unwrap_or(DEFAULT_PAGE_SIZE);
let mut offset = 0;
let mut cloudcasts_url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
cloudcasts_url.set_path(&format!("{channel_id}/cloudcasts/"));
println!("⏬ Retrieving cloudcasts of user {channel_id} from {cloudcasts_url}...");
set_paging_query(&mut cloudcasts_url, limit, offset);
let mut cloudcasts = Vec::with_capacity(50); // The initial limit
loop {
let cloudcasts_res: CloudcastsResponse = fetch_cloudcasts(cloudcasts_url).await?;
let count = cloudcasts_res.items.len();
cloudcasts.extend(cloudcasts_res.items);
// Continue onto the next URL in the paging, if there is one and the limit was not
// reached.
limit = limit.saturating_sub(count);
offset += count;
match (limit, cloudcasts_res.paging.next) {
(0, Some(_)) => break,
(_, Some(next_url)) => {
cloudcasts_url = Url::parse(&next_url)?;
set_paging_query(&mut cloudcasts_url, limit, offset);
}
(_, None) => break,
}
}
Ok(Channel::from(UserWithCloudcasts(user, cloudcasts)))
}
async fn redirect_url(&self, file: &Path) -> Result<String> {
let key = format!("/{}/", file.with_extension("").to_string_lossy());
retrieve_redirect_url(&key).await
}
}
/// A Mixcloud user with its cloudcasts.
pub(crate) struct UserWithCloudcasts(User, Vec<Cloudcast>);
/// A Mixcloud user (response).
#[derive(Clone, Debug, Deserialize)]
@ -25,15 +107,15 @@ pub(crate) struct User {
pub(crate) pictures: Pictures,
/// The original URL of the user.
pub(crate) url: String,
pub(crate) url: Url,
}
/// A collection of different sizes/variants of a picture.
#[derive(Clone, Debug, Deserialize)]
#[serde(crate = "rocket::serde")]
pub(crate) struct Pictures {
/// The large picture of the user.
pub(crate) large: String,
/// The URL of a large picture of the user.
pub(crate) large: Url,
}
/// The Mixcloud cloudcasts response.
@ -79,7 +161,7 @@ pub(crate) struct Cloudcast {
pub(crate) updated_time: DateTime<Utc>,
/// The original URL of the cloudcast.
pub(crate) url: String,
pub(crate) url: Url,
/// The length of the cloudcast (in seconds).
pub(crate) audio_length: u32,
@ -93,45 +175,69 @@ pub(crate) struct Tag {
pub(crate) name: String,
/// The URL of the tag.
pub(crate) url: String,
pub(crate) url: Url,
}
/// The base URL for the Mixcloud API.
const API_BASE_URL: &str = "https://api.mixcloud.com";
impl From<UserWithCloudcasts> for Channel {
fn from(UserWithCloudcasts(user, cloudcasts): UserWithCloudcasts) -> Self {
// FIXME: Don't hardcode the category!
let categories = Vec::from([String::from("Music")]);
let items = cloudcasts.into_iter().map(From::from).collect();
/// The base URL for downloading Mixcloud files.
const FILES_BASE_URL: &str = "https://www.mixcloud.com";
Channel {
title: format!("{0} (via Mixcloud)", user.name),
link: user.url,
description: user.biog,
author: Some(user.name),
categories,
image: user.pictures.large,
items,
}
}
}
/// The default bitrate used by Mixcloud.
const DEFAULT_BITRATE: u32 = 64 * 1024;
impl From<Cloudcast> for Item {
fn from(cloudcast: Cloudcast) -> Self {
let mut file = PathBuf::from(cloudcast.key.trim_end_matches('/'));
file.set_extension("m4a"); // FIXME: Don't hardcoded the extension!
/// The default file (MIME) type used by Mixcloud.
const DEFAULT_FILE_TYPE: &str = "audio/mpeg";
// FIXME: Don't hardcode the description!
let description = Some(format!("Taken from Mixcloud: {0}", cloudcast.url));
let categories = cloudcast
.tags
.iter()
.cloned()
.map(|tag| (tag.name, tag.url))
.collect();
let enclosure = Enclosure {
file,
mime_type: String::from(DEFAULT_FILE_TYPE),
length: estimated_file_size(cloudcast.audio_length),
};
let keywords = cloudcast.tags.into_iter().map(|tag| tag.name).collect();
/// The default page size.
const DEFAULT_PAGE_SIZE: usize = 50;
/// Returns the default file type used by Mixcloud.
pub(crate) const fn default_file_type() -> &'static str {
DEFAULT_FILE_TYPE
Item {
title: cloudcast.name,
link: cloudcast.url,
description,
categories,
enclosure,
duration: Some(cloudcast.audio_length),
guid: cloudcast.slug,
keywords,
image: cloudcast.pictures.large,
updated_at: cloudcast.updated_time,
}
}
}
/// Returns the estimated file size in bytes for a given duration.
///
/// This uses the default bitrate (see [`DEFAULT_BITRATE`]) which is in B/s.
pub(crate) fn estimated_file_size(duration: u32) -> u32 {
fn estimated_file_size(duration: u32) -> u32 {
DEFAULT_BITRATE * duration / 8
}
/// Retrieves the user data using the Mixcloud API.
pub(crate) async fn user(username: &str) -> Result<User> {
let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
url.set_path(username);
println!("⏬ Retrieving user {username} from {url}...");
fetch_user(url).await
}
/// Fetches the user from the URL.
#[cached(
key = "String",
@ -148,37 +254,6 @@ async fn fetch_user(url: Url) -> Result<User> {
Ok(user)
}
/// Retrieves the cloudcasts data of the user using the Mixcloud API.
pub(crate) async fn cloudcasts(username: &str, limit: Option<usize>) -> Result<Vec<Cloudcast>> {
let mut limit = limit.unwrap_or(DEFAULT_PAGE_SIZE);
let mut offset = 0;
let mut url = Url::parse(API_BASE_URL).expect("URL can always be parsed");
url.set_path(&format!("{username}/cloudcasts/"));
println!("⏬ Retrieving cloudcasts of user {username} from {url}...");
set_paging_query(&mut url, limit, offset);
let mut cloudcasts = Vec::with_capacity(50); // The initial limit
loop {
let cloudcasts_res: CloudcastsResponse = fetch_cloudcasts(url).await?;
let count = cloudcasts_res.items.len();
cloudcasts.extend(cloudcasts_res.items);
// Continue onto the next URL in the paging, if there is one and the limit was not reached.
limit = limit.saturating_sub(count);
offset += count;
match (limit, cloudcasts_res.paging.next) {
(0, Some(_)) => break,
(_, Some(next_url)) => {
url = Url::parse(&next_url)?;
set_paging_query(&mut url, limit, offset);
}
(_, None) => break,
}
}
Ok(cloudcasts)
}
/// Fetches cloudcasts from the URL.
///
/// If the result is [`Ok`], the cloudcasts will be cached for 24 hours for the given username.
@ -216,7 +291,7 @@ fn set_paging_query(url: &mut Url, limit: usize, offset: usize) {
time = 86400,
result = true
)]
pub(crate) async fn redirect_url(download_key: &str) -> Result<String> {
async fn retrieve_redirect_url(download_key: &str) -> Result<String> {
let mut url = Url::parse(FILES_BASE_URL).expect("URL can always be parsed");
url.set_path(download_key);

View File

@ -26,7 +26,9 @@ use rss::{
CategoryBuilder, ChannelBuilder, EnclosureBuilder, GuidBuilder, ImageBuilder, ItemBuilder,
};
pub(crate) mod mixcloud;
use crate::backends::{mixcloud, Backend};
pub(crate) mod backends;
/// The possible errors that can occur.
#[derive(Debug, thiserror::Error)]
@ -88,31 +90,38 @@ struct RssFeed(String);
#[get("/download/<backend>/<file..>")]
pub(crate) async fn download(file: PathBuf, backend: &str) -> Result<Redirect> {
match backend {
"mixcloud" => {
let key = format!("/{}/", file.with_extension("").to_string_lossy());
mixcloud::redirect_url(&key).await.map(Redirect::to)
}
"mixcloud" => mixcloud::backend()
.redirect_url(&file)
.await
.map(Redirect::to),
_ => Err(Error::UnsupportedBackend(backend.to_string())),
}
}
/// Handler for retrieving the RSS feed of user on a certain back-end.
/// Handler for retrieving the RSS feed of a channel on a certain back-end.
///
/// The limit parameter determines the maximum of items that can be in the feed.
#[get("/feed/<backend>/<username>?<limit>")]
#[get("/feed/<backend>/<channel_id>?<limit>")]
async fn feed(
backend: &str,
username: &str,
channel_id: &str,
limit: Option<usize>,
config: &State<Config>,
) -> Result<RssFeed> {
let user = mixcloud::user(username).await?;
let cloudcasts = mixcloud::cloudcasts(username, limit).await?;
let mut last_build = DateTime::<Utc>::from_utc(NaiveDateTime::from_timestamp(0, 0), Utc);
let channel = match backend {
"mixcloud" => mixcloud::backend().channel(channel_id, limit).await?,
_ => return Err(Error::UnsupportedBackend(backend.to_string())),
};
let category = CategoryBuilder::default()
.name(String::from("Music")) // FIXME: Don't hardcode the category!
.name(
channel
.categories
.first()
.map(Clone::clone)
.unwrap_or_default(),
)
.build();
let generator = String::from(concat!(
env!("CARGO_PKG_NAME"),
@ -120,85 +129,78 @@ async fn feed(
env!("CARGO_PKG_VERSION")
));
let image = ImageBuilder::default()
.link(user.pictures.large.clone())
.url(user.pictures.large.clone())
.link(channel.image.clone())
.url(channel.image.clone())
.build();
let items = cloudcasts
let items = channel
.items
.into_iter()
.map(|cloudcast| {
let mut file = PathBuf::from(cloudcast.key.trim_end_matches('/'));
file.set_extension("m4a"); // FIXME: Don't hardcode the extension!
let url = uri!(
Absolute::parse(&config.url).expect("valid URL"),
download(backend = backend, file = file)
);
// FIXME: Don't hardcode the description!
let description = format!("Taken from Mixcloud: {}", cloudcast.url);
let keywords = cloudcast
.tags
.iter()
.map(|tag| &tag.name)
.cloned()
.collect::<Vec<_>>()
.join(", ");
let categories = cloudcast
.tags
.map(|item| {
let categories = item
.categories
.into_iter()
.map(|tag| {
.map(|(cat_name, cat_url)| {
CategoryBuilder::default()
.name(tag.name)
.domain(Some(tag.url))
.name(cat_name)
.domain(Some(cat_url.to_string()))
.build()
})
.collect::<Vec<_>>();
let length = mixcloud::estimated_file_size(cloudcast.audio_length);
let url = uri!(
Absolute::parse(&config.url).expect("valid URL"),
download(backend = backend, file = item.enclosure.file)
);
let enclosure = EnclosureBuilder::default()
.url(url.to_string())
.length(format!("{}", length))
.mime_type(String::from(mixcloud::default_file_type()))
.length(item.enclosure.length.to_string())
.mime_type(item.enclosure.mime_type)
.build();
let guid = GuidBuilder::default()
.value(cloudcast.slug)
.value(item.guid)
.permalink(false)
.build();
let keywords = item.keywords.join(", ");
let itunes_ext = ITunesItemExtensionBuilder::default()
.image(Some(cloudcast.pictures.large))
.duration(Some(format!("{}", cloudcast.audio_length)))
.subtitle(Some(description.clone()))
.image(Some(item.image.to_string()))
.duration(item.duration.map(|dur| format!("{dur}")))
.subtitle(item.description.clone())
.keywords(Some(keywords))
.build();
if cloudcast.updated_time > last_build {
last_build = cloudcast.updated_time;
if item.updated_at > last_build {
last_build = item.updated_at;
}
ItemBuilder::default()
.title(Some(cloudcast.name))
.link(Some(cloudcast.url))
.description(Some(description))
.title(Some(item.title))
.link(Some(item.link.to_string()))
.description(item.description)
.categories(categories)
.enclosure(Some(enclosure))
.guid(Some(guid))
.pub_date(Some(cloudcast.updated_time.to_rfc2822()))
.pub_date(Some(item.updated_at.to_rfc2822()))
.itunes_ext(Some(itunes_ext))
.build()
})
.collect::<Vec<_>>();
let itunes_ext = ITunesChannelExtensionBuilder::default()
.author(Some(user.name.clone()))
.categories(Vec::from([ITunesCategoryBuilder::default()
.text(String::from("Music"))
.build()])) // FIXME: Don't hardcode the category!
.image(Some(user.pictures.large))
.author(channel.author)
.categories(
channel
.categories
.into_iter()
.map(|cat| ITunesCategoryBuilder::default().text(cat).build())
.collect::<Vec<_>>(),
)
.image(Some(channel.image.to_string()))
.explicit(Some(String::from("no")))
.summary(Some(user.biog.clone()))
.summary(Some(channel.description.clone()))
.build();
let channel = ChannelBuilder::default()
.title(&format!("{} (via Mixcloud)", user.name))
.link(&user.url)
.description(&user.biog)
.title(channel.title)
.link(channel.link)
.description(channel.description)
.category(category)
.last_build_date(Some(last_build.to_rfc2822()))
.generator(Some(generator))