Merge pull request 'Replace using Firefox/Gecko Driver by using direct HTTP requests' (#1) from webdriverless into master

Reviewed-on: #1
This commit is contained in:
Paul van Tilburg 2021-11-28 13:11:01 +01:00
commit 108d6f06b5
5 changed files with 465 additions and 507 deletions

650
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -7,8 +7,8 @@ edition = "2018"
[dependencies]
color-eyre = "0.5.6"
lazy_static = "1.4.0"
reqwest = { version = "0.11", features = ["cookies", "json"] }
rocket = { version = "0.5.0-rc.1", features = ["json"] }
serde = "1.0.116"
toml = "0.5.6"
thirtyfour = { version = "0.25.0", features = ["tokio-runtime"] }
tokio = { version = "1.6.1", features = ["process"] }
url = "2"

View File

@ -4,7 +4,7 @@
########################## BUILD IMAGE ##########################
# Rust build image to build Autarco Scraper's statically compiled binary
FROM docker.io/rust:1.45 as builder
FROM docker.io/rust:1.54 as builder
# Build the dependencies first
RUN USER=root cargo new --bin autarco-scraper
@ -22,18 +22,13 @@ RUN cargo build --release
########################## RUNTIME IMAGE ##########################
# Create new stage with a minimal image for the actual runtime image/container
FROM docker.io/debian:buster-slim
FROM docker.io/debian:bullseye-slim
# Install cURL, Firefox and the Gecko Driver
# Install CA certificates
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends ca-certificates curl firefox-esr jq && \
apt-get install -y --no-install-recommends ca-certificates && \
rm -rf /var/lib/apt/lists/*
RUN export VERSION=$(curl -sL https://api.github.com/repos/mozilla/geckodriver/releases/latest | jq -r .tag_name); \
curl -vsL https://api.github.com/repos/mozilla/geckodriver/releases/latest | jq -r .tag_name; \
curl -sL "https://github.com/mozilla/geckodriver/releases/download/$VERSION/geckodriver-$VERSION-linux64.tar.gz" | \
tar -xz -C /usr/local/bin && \
chmod +x /usr/local/bin/geckodriver
# Copy the binary from the "builder" stage to the current stage
RUN adduser --system --disabled-login --home /autarco-scraper --gecos "" --shell /bin/bash autarco-scraper

View File

@ -1,68 +1,46 @@
use std::path::Path;
use std::process::Stdio;
use std::sync::Mutex;
use std::thread;
use std::time::{Duration, SystemTime};
use color_eyre::eyre::eyre;
use color_eyre::Result;
use lazy_static::lazy_static;
use rocket::serde::json::Json;
use rocket::tokio::fs::File;
use rocket::tokio::io::AsyncReadExt;
use rocket::tokio::select;
use rocket::tokio::sync::oneshot::Receiver;
use rocket::tokio::time::sleep;
use rocket::{get, routes};
use serde::{Deserialize, Serialize};
use thirtyfour::prelude::*;
use tokio::process::{Child, Command};
/// The port used by the Gecko Driver
const GECKO_DRIVER_PORT: u16 = 4444;
use self::update::update_loop;
/// The interval between data polls
mod update;
/// The base URL of My Autarco site.
const BASE_URL: &str = "https://my.autarco.com";
/// The interval between data polls.
///
/// This depends on with which interval Autaurco processes new information from the convertor.
/// This depends on with which interval Autaurco processes new information from the invertor.
const POLL_INTERVAL: u64 = 300;
/// The URL to the My Autarco site
const URL: &'static str = "https://my.autarco.com/";
/// The login configuration
/// The configuration for the My Autarco site
#[derive(Debug, Deserialize)]
struct Config {
/// The username of the account to login with
username: String,
/// The password of the account to login with
password: String,
/// The Autarco site ID to track
site_id: String,
}
/// Spawns the gecko driver
/// Loads the configuration.
///
/// Note that the function blocks and delays at least a second to ensure everything is up and
/// running.
fn spawn_driver(port: u16) -> Result<Child> {
// This is taken from the webdriver-client crate.
let child = Command::new("geckodriver")
.arg("--port")
.arg(format!("{}", port))
.stdin(Stdio::null())
.stderr(Stdio::null())
.stdout(Stdio::null())
.kill_on_drop(true)
.spawn()?;
thread::sleep(Duration::new(1, 500));
Ok(child)
}
#[derive(Clone, Copy, Debug, Serialize)]
struct Status {
current_w: u32,
total_kwh: u32,
last_updated: u64,
}
/// The configuration file `autarco.toml` should be located in the project path.
///
/// # Errors
///
/// Returns an error if the file could not be found, opened or read and if the contents are
/// not valid TOML or does not contain all the necessary keys (see [`Config`]).
async fn load_config() -> Result<Config> {
let config_file_name = Path::new(env!("CARGO_MANIFEST_DIR")).join("autarco.toml");
let mut file = File::open(config_file_name).await?;
@ -74,122 +52,41 @@ async fn load_config() -> Result<Config> {
Ok(config)
}
async fn login(driver: &WebDriver) -> Result<()> {
let config = load_config().await?;
driver.get(URL).await?;
let input = driver.find_element(By::Id("username")).await?;
input.send_keys(&config.username).await?;
let input = driver.find_element(By::Id("password")).await?;
input.send_keys(&config.password).await?;
let input = driver.find_element(By::Css("button[type=submit]")).await?;
input.click().await?;
Ok(())
}
async fn element_value(driver: &WebDriver, by: By<'_>) -> Result<u32> {
let element = driver.find_element(by).await?;
let text = element.text().await?;
let value = text.parse()?;
Ok(value)
/// The current photovoltaic invertor status.
#[derive(Clone, Copy, Debug, Serialize)]
struct Status {
/// Current power production (W)
current_w: u32,
/// Total energy produced since installation (kWh)
total_kwh: u32,
/// Timestamp of last update
last_updated: u64,
}
lazy_static! {
/// The concurrently accessible current status.
static ref STATUS: Mutex<Option<Status>> = Mutex::new(None);
}
async fn update_loop(mut rx: Receiver<()>) -> Result<()> {
let mut caps = DesiredCapabilities::firefox();
caps.set_headless()?;
let driver = WebDriver::new(&format!("http://localhost:{}", GECKO_DRIVER_PORT), &caps).await?;
// Go to the My Autarco site and login
println!("⚡ Logging in...");
// FIXME: Just dropping the driver hangs the process?
if let Err(e) = login(&driver).await {
driver.quit().await?;
return Err(e);
}
let mut last_updated = 0;
loop {
// Wait the poll interval to check again!
sleep(Duration::from_secs(1)).await;
// Shut down if there is a signal
if let Ok(()) = rx.try_recv() {
break;
}
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap()
.as_secs();
if timestamp - last_updated < POLL_INTERVAL {
continue;
}
// Retrieve the data from the elements
let current_w = match element_value(&driver, By::Css("h2#pv-now b")).await {
Ok(value) => value,
Err(error) => {
eprintln!("Failed to retrieve current power: {}", error);
continue;
}
};
let total_kwh = match element_value(&driver, By::Css("h2#pv-to-date b")).await {
Ok(value) => value,
Err(error) => {
eprintln!("Failed to retrieve total energy production: {}", error);
continue;
}
};
last_updated = timestamp;
// Update the status
let mut status_guard = STATUS.lock().expect("Status mutex was poisoned");
let status = Status {
current_w,
total_kwh,
last_updated,
};
println!("⚡ Updated status to: {:#?}", status);
status_guard.replace(status);
}
Ok(())
}
/// Returns the current (last known) status.
#[get("/", format = "application/json")]
async fn status() -> Option<Json<Status>> {
let status_guard = STATUS.lock().expect("Status mutex was poisoined");
status_guard.map(|status| Json(status))
status_guard.map(Json)
}
/// Starts the main update loop and sets up and launches Rocket.
#[rocket::main]
async fn main() -> Result<()> {
color_eyre::install()?;
let mut driver_proc =
spawn_driver(GECKO_DRIVER_PORT).expect("Could not find/start the Gecko Driver");
let (tx, rx) = rocket::tokio::sync::oneshot::channel();
let updater = rocket::tokio::spawn(update_loop(rx));
let rocket = rocket::build().mount("/", routes![status]).ignite().await?;
let shutdown = rocket.shutdown();
let updater = rocket::tokio::spawn(update_loop());
select! {
result = driver_proc.wait() => {
shutdown.notify();
tx.send(()).map_err(|_| eyre!("Could not send shutdown signal"))?;
result?;
},
result = rocket.launch() => {
tx.send(()).map_err(|_| eyre!("Could not send shutdown signal"))?;
result?;
},
result = updater => {

132
src/update.rs Normal file
View File

@ -0,0 +1,132 @@
//! Module for handling the status updating/retrieval via the My Autarco site/API.
use std::time::{Duration, SystemTime};
use reqwest::{Client, ClientBuilder, Error, StatusCode};
use rocket::tokio::time::sleep;
use serde::Deserialize;
use url::{ParseError, Url};
use super::{load_config, Config, Status, BASE_URL, POLL_INTERVAL, STATUS};
/// Returns the login URL for the My Autarco site.
fn login_url() -> Result<Url, ParseError> {
Url::parse(&format!("{}/auth/login", BASE_URL))
}
/// Returns an API endpoint URL for the given site ID and endpoint of the My Autarco site.
fn api_url(site_id: &str, endpoint: &str) -> Result<Url, ParseError> {
Url::parse(&format!(
"{}/api/site/{}/kpis/{}",
BASE_URL, site_id, endpoint
))
}
/// The energy data returned by the energy API endpoint.
#[derive(Debug, Deserialize)]
struct ApiEnergy {
/// Total energy produced today (kWh)
pv_today: u32,
/// Total energy produced this month (kWh)
pv_month: u32,
/// Total energy produced since installation (kWh)
pv_to_date: u32,
}
/// The power data returned by the power API endpoint.
#[derive(Debug, Deserialize)]
struct ApiPower {
/// Current power production (W)
pv_now: u32,
}
/// Performs a login on the My Autarco site.
///
/// It mainly stores the acquired cookie in the client's cookie jar. The login credentials come
/// from the loaded configuration (see [`Config`]).
async fn login(config: &Config, client: &Client) -> Result<(), Error> {
let params = [
("username", &config.username),
("password", &config.password),
];
let login_url = login_url().expect("valid login URL");
client.post(login_url).form(&params).send().await?;
Ok(())
}
/// Retrieves a status update from the API of the My Autarco site.
///
/// It needs the cookie from the login to be able to perform the action. It uses both the `energy`
/// and `power` endpoint to construct the [`Status`] struct.
async fn update(config: &Config, client: &Client, last_updated: u64) -> Result<Status, Error> {
// Retrieve the data from the API endpoints.
let api_energy_url = api_url(&config.site_id, "energy").expect("valid API energy URL");
let api_response = client.get(api_energy_url).send().await?;
let api_energy: ApiEnergy = match api_response.error_for_status() {
Ok(res) => res.json().await?,
Err(err) => return Err(err),
};
let api_power_url = api_url(&config.site_id, "power").expect("valid API power URL");
let api_response = client.get(api_power_url).send().await?;
let api_power: ApiPower = match api_response.error_for_status() {
Ok(res) => res.json().await?,
Err(err) => return Err(err),
};
// Update the status.
Ok(Status {
current_w: api_power.pv_now,
total_kwh: api_energy.pv_to_date,
last_updated,
})
}
/// Main update loop that logs in and periodically acquires updates from the API.
///
/// It updates the mutex-guarded current update [`Status`] struct which can be retrieved via
/// Rocket.
pub(super) async fn update_loop() -> color_eyre::Result<()> {
let config = load_config().await?;
let client = ClientBuilder::new().cookie_store(true).build()?;
// Go to the My Autarco site and login.
println!("⚡ Logging in...");
login(&config, &client).await?;
println!("⚡ Logged in successfully!");
let mut last_updated = 0;
loop {
// Wake up every 10 seconds and check if an update is due.
sleep(Duration::from_secs(10)).await;
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
if timestamp - last_updated < POLL_INTERVAL {
continue;
}
let status = match update(&config, &client, timestamp).await {
Ok(status) => status,
Err(e) if e.status() == Some(StatusCode::UNAUTHORIZED) => {
println!("✨ Update unauthorized, trying to log in again...");
login(&config, &client).await?;
println!("⚡ Logged in successfully!");
continue;
}
Err(e) => {
println!("✨ Failed to update status: {}", e);
continue;
}
};
last_updated = timestamp;
println!("⚡ Updated status to: {:#?}", status);
let mut status_guard = STATUS.lock().expect("Status mutex was poisoned");
status_guard.replace(status);
}
}